示例#1
0
        /// <summary>
        ///     Creates LCMS Features
        /// </summary>
        public List <UMCLight> CreateLcmsFeatures(
            DatasetInformation information,
            List <MSFeatureLight> msFeatures,
            LcmsFeatureFindingOptions options,
            LcmsFeatureFilteringOptions filterOptions,
            IScanSummaryProvider provider,
            IProgress <ProgressData> progress = null)
        {
            // Make features
            if (msFeatures.Count < 1)
            {
                throw new Exception("No features were found in the feature files provided.");
            }

            UpdateStatus("Finding features.");

            ValidateFeatureFinderMaxScanLength(information, options, filterOptions);

            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            finder.Progress += (sender, args) => UpdateStatus(args.Message);
            var features = finder.FindFeatures(msFeatures, options, provider, progress);

            UpdateStatus("Filtering features.");
            List <UMCLight> filteredFeatures = LcmsFeatureFilters.FilterFeatures(features, filterOptions, provider);

            UpdateStatus(string.Format("Filtered features from: {0} to {1}.", features.Count, filteredFeatures.Count));
            return(filteredFeatures);
        }
示例#2
0
        /// <summary>
        ///     Filters the list of MS Features that may be from MS/MS deisotoped data.
        /// </summary>
        public List <MSFeatureLight> Filter(List <MSFeatureLight> msFeatures, IScanSummaryProvider provider, ref DatasetInformation dataset)
        {
            string rawPath = dataset.RawFile.Path;

            if (rawPath == null || string.IsNullOrWhiteSpace(rawPath))
            {
                return(msFeatures);
            }

            // First find all unique scans
            var scanMap = new Dictionary <int, bool>();

            foreach (var feature in msFeatures)
            {
                if (!scanMap.ContainsKey(feature.Scan))
                {
                    // Assume all scans are parents
                    scanMap.Add(feature.Scan, true);
                }
            }
            // Then parse each to figure out if this is true.
            var fullScans = new Dictionary <int, bool>();
            var scanTimes = dataset.ScanTimes;

            if (provider == null)
            {
                UpdateStatus(string.Format("Warning: Raw file not found ({0}); scan times are not available!", System.IO.Path.GetFileName(rawPath)));
            }
            else
            {
                UpdateStatus(string.Format("Reading scan info from {0}", System.IO.Path.GetFileName(rawPath)));

                foreach (var scan in scanMap.Keys)
                {
                    ScanSummary summary = provider.GetScanSummary(scan);

                    if (summary == null)
                    {
                        continue;
                    }
                    if (summary.MsLevel == 1)
                    {
                        fullScans.Add(scan, true);
                    }
                    if (scanTimes.ContainsKey(scan))
                    {
                        scanTimes[scan] = summary.Time;
                    }
                    else
                    {
                        scanTimes.Add(scan, summary.Time);
                    }
                }
                dataset.ScanTimes = scanTimes;
            }
            return(msFeatures.Where(x => fullScans.ContainsKey(x.Scan)).ToList());
        }
示例#3
0
        public MsToLcmsFeatures(IScanSummaryProvider provider, LcmsFeatureFindingOptions options = null)
        {
            if (provider == null)
            {
                throw new ArgumentNullException();
            }

            Comparison <MSFeatureLight> mzSort   = (x, y) => x.Mz.CompareTo(y.Mz);
            Comparison <UMCLight>       monoSort = (x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic);
            Func <MSFeatureLight, MSFeatureLight, double> mzDiff   = (x, y) => FeatureLight.ComputeMassPPMDifference(x.Mz, y.Mz);
            Func <UMCLight, UMCLight, double>             monoDiff = (x, y) => FeatureLight.ComputeMassPPMDifference(x.MassMonoisotopic, y.MassMonoisotopic);

            this.provider = provider;
            this.options  = options ?? new LcmsFeatureFindingOptions();

            // Set clusterers
            if (this.options.FirstPassClusterer == MsFeatureClusteringAlgorithmType.BinarySearchTree)
            {
                this.firstPassClusterer = new MsFeatureTreeClusterer <MSFeatureLight, UMCLight>(
                    mzSort,
                    mzDiff,
                    MassComparison.Mz,
                    this.options.InstrumentTolerances.Mass);
            }
            else
            {
                this.firstPassClusterer = ClusterFactory.Create(this.options.FirstPassClusterer);
            }

            if (this.options.SecondPassClusterer == GenericClusteringAlgorithmType.BinarySearchTree)
            {
                this.secondPassClusterer = new MsFeatureTreeClusterer <UMCLight, UMCLight>(
                    monoSort,
                    monoDiff,
                    MassComparison.Monoisotopic,
                    this.options.InstrumentTolerances.Mass);
            }
            else
            {
                var clusterFactory = new GenericClusterFactory <UMCLight, UMCLight>();
                this.secondPassClusterer = clusterFactory.Create(this.options.SecondPassClusterer);
            }
        }
示例#4
0
        public IEnumerable <UMCLight> TestUmcFeatures(string relativePath, int expectedFeatureCount)
        {
            // Get the absolute path
            var path = GetPath(relativePath);

            var reader = new MsFeatureLightFileReader {
                Delimiter = ','
            };
            var newMsFeatures = reader.ReadFile(path);

            var finder = new UmcTreeFeatureFinder
            {
                MaximumNet  = .005,
                MaximumScan = 50
            };
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net  = .005
            };

            var options = new LcmsFeatureFindingOptions(tolerances);

            IScanSummaryProvider provider = null;
            var rawFilePath = path.Replace("_isos.csv", ".raw");

            UpdateStatus("Using raw data to create better features.");

            var providerCache = new ScanSummaryProviderCache();

            provider = providerCache.GetScanSummaryProvider(rawFilePath, 1);

            var features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);

            // Work on total feature count here.
            Assert.Greater(features.Count, 0);

            Assert.AreEqual(expectedFeatureCount, features.Count);

            return(features);
        }
示例#5
0
        public static List <T> FilterFeatures <T>(List <T> features, LcmsFeatureFilteringOptions options, IScanSummaryProvider scanSummaryProvider = null)
            where T : UMCLight
        {
            IEnumerable <T> newFeatures;

            if (scanSummaryProvider == null || !options.FilterOnMinutes)
            {
                var minimumSize = options.FeatureLengthRangeScans.Minimum;
                var maximumSize = options.FeatureLengthRangeScans.Maximum;


                // Scan Length
                newFeatures = features.Where(x =>
                {
                    var size = Math.Abs(x.ScanStart - x.ScanEnd);
                    return(size >= minimumSize && size <= maximumSize);
                });
            }
            else
            {
                var minimumSize   = options.FeatureLengthRangeMinutes.Minimum;
                var maximumSize   = options.FeatureLengthRangeMinutes.Maximum;
                var minimumPoints = options.MinimumDataPoints;

                //var knownScanNumbers = scanTimes.Keys.ToList();
                //knownScanNumbers.Sort();

                // Scan Length
                newFeatures = features.Where(x =>
                {
                    try
                    {
                        double size = 0;
                        if (x.ScanStart == 0)
                        {
                            //Scan 0 won't show up in scanTimes dictionary, so the feature length is just the time of the last feature scan.
                            size = scanSummaryProvider.GetScanSummary(x.ScanEnd).Time;
                        }
                        else
                        {
                            size = Math.Abs(scanSummaryProvider.GetScanSummary(x.ScanEnd).Time - scanSummaryProvider.GetScanSummary(x.ScanStart).Time);
                        }
                        return(size >= minimumSize && size <= maximumSize && x.Features.Count >= minimumPoints);
                    }
                    catch (Exception ex)
                    {
                        throw (new IndexOutOfRangeException(String.Format("Exception determining the elution time for scans {0} and {1}: {2}", x.ScanStart, x.ScanEnd, ex.Message)));
                    }
                });
            }

            return(newFeatures.Where(x => x.Abundance > 0).ToList());
        }
示例#6
0
        /// <summary>
        ///     Finds features
        /// </summary>
        /// <returns></returns>
        public List <UMCLight> FindFeatures(List <MSFeatureLight> msFeatures,
                                            LcmsFeatureFindingOptions options, IScanSummaryProvider provider,
                                            IProgress <ProgressData> progress = null)
        {
            if (provider == null)
            {
                throw new ArgumentNullException(nameof(provider));
            }

            var tolerances = new FeatureTolerances
            {
                Mass = options.InstrumentTolerances.Mass,
                Net  = options.MaximumNetRange
            };

            var clusterer = new MsToLcmsFeatures(provider, options);

            // MultiAlignCore.Algorithms.FeatureClustering.MsFeatureTreeClusterer
            //var clusterer = new MsFeatureTreeClusterer<MSFeatureLight, UMCLight>
            //{
            //    Tolerances =
            //        new FeatureTolerances
            //        {
            //            Mass = options.InstrumentTolerances.Mass,
            //            Net = options.MaximumNetRange
            //        },
            //    ScanTolerance = options.MaximumScanRange,
            //    SpectraProvider = (InformedProteomicsReader) provider
            //    //TODO: Make sure we have a mass range for XIC's too....
            //};

            //clusterer.SpectraProvider = (InformedProteomicsReader) provider;

            //OnStatus("Starting cluster definition");
            //clusterer.Progress += (sender, args) => OnStatus(args.Message);

            var features = clusterer.Convert(msFeatures, progress);

            var minScan = int.MaxValue;
            var maxScan = int.MinValue;

            foreach (var feature in msFeatures)
            {
                minScan = Math.Min(feature.Scan, minScan);
                maxScan = Math.Max(feature.Scan, maxScan);
            }



            var minScanTime = provider.GetScanSummary(minScan).Time;
            var maxScanTime = provider.GetScanSummary(maxScan).Time;
            var id          = 0;
            var newFeatures = new List <UMCLight>();

            foreach (var feature in features)
            {
                if (feature.MsFeatures.Count < 1)
                {
                    continue;
                }
                feature.Net = (provider.GetScanSummary(feature.Scan).Time - minScanTime) /
                              (maxScanTime - minScanTime);
                feature.CalculateStatistics();
                feature.Id = id++;
                newFeatures.Add(feature);
                //Sets the width of the feature to be the width of the peak, not the width of the tails
                var maxAbundance      = double.MinValue;
                var maxAbundanceIndex = 0;
                for (var msFeatureIndex = 0; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    var msFeature = feature.MsFeatures[msFeatureIndex];
                    if (msFeature.Abundance > maxAbundance)
                    {
                        maxAbundance      = msFeature.Abundance;
                        maxAbundanceIndex = msFeatureIndex;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex > 0; msFeatureIndex--)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanStart = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
                for (var msFeatureIndex = maxAbundanceIndex; msFeatureIndex < feature.MsFeatures.Count - 1; msFeatureIndex++)
                {
                    if (feature.MsFeatures[msFeatureIndex].Abundance / maxAbundance <= 0.05)
                    {
                        feature.ScanEnd = feature.MsFeatures[msFeatureIndex].Scan;
                        break;
                    }
                }
            }
            return(features);
        }
示例#7
0
        /// <summary>
        ///     Loads feature data from the files provided.
        /// </summary>
        /// <returns></returns>
        public static IList <UMCLight> LoadUmcFeatureData(DatasetInformation dataset, IUmcDAO featureCache, IScanSummaryProvider provider = null)
        {
            var features  = new List <UMCLight>();
            var extension = Path.GetExtension(dataset.Features.Path);

            if (extension == null)
            {
                return(features);
            }

            extension = extension.ToUpper();
            switch (extension)
            {
            case ".TXT":
                if (dataset.Features.Path.EndsWith("_LCMSFeatures.txt"))
                {
                    var reader = new LcImsFeatureFileReader(provider, dataset.DatasetId);
                    features = reader.ReadFile(dataset.Features.Path).ToList();
                }
                else
                {
                    var umcReader = new LCMSFeatureFileReader(dataset.Features.Path);
                    features = umcReader.GetUmcList();
                }
                break;

            case ".DB3":
                features = featureCache.FindByDatasetId(dataset.DatasetId);
                break;

            case ".MS1FT":
                if (provider != null && provider is InformedProteomicsReader)
                {
                    var promexReader = new PromexFileReader(provider as InformedProteomicsReader, dataset.DatasetId);
                    features = promexReader.ReadFile(dataset.Features.Path).ToList();
                }
                break;

            default:     //Was reconstructing features from scratch even when they were already cached because the file extention was ".csv" not ".db3"
                features = featureCache.FindByDatasetId(dataset.DatasetId);
                break;
            }

            if (features != null && provider is ISpectraProvider)
            {
                var spectraProvider = provider as ISpectraProvider;
                LoadMsMs(features, spectraProvider);
            }
            return(features);
        }
示例#8
0
 public LcImsFeatureFileReader(IScanSummaryProvider provider = null, int datasetId = 0)
 {
     this.provider  = provider;
     this.datasetId = datasetId;
 }
示例#9
0
 public Dictionary <int, int> LinkMSFeaturesToMSn(List <MSFeatureLight> features,
                                                  List <MSSpectra> fragmentSpectra,
                                                  IScanSummaryProvider provider)
 {
     return(LinkMSFeaturesToMSn(features, fragmentSpectra));
 }
示例#10
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList <UMCLight> LoadDataset(DatasetInformation dataset,
                                            MsFeatureFilteringOptions msFilteringOptions,
                                            LcmsFeatureFindingOptions lcmsFindingOptions,
                                            LcmsFeatureFilteringOptions lcmsFilteringOptions,
                                            DataLoadingOptions dataLoadOptions,
                                            ScanSummaryProviderCache providerCache,
                                            IdentificationProviderCache identificationProviders,
                                            IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress);
            IScanSummaryProvider provider = null;

            if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId);
            }

            progData.StepRange(1);
            progData.Status = "Looking for existing features in the database.";
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId     = dataset.DatasetId;
            var features      = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider);
            var hasMsFeatures = features.Any(f => f.MsFeatures.Any());

            var msFeatures = new List <MSFeatureLight>();

            if (!hasMsFeatures)
            {
                progData.StepRange(2);
                progData.Status = "Loading MS Feature Data.";
                UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                                           dataset.DatasetName));

                var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions();
                msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions);
            }

            progData.StepRange(3);
            progData.Status = "Loading scan summaries.";
            ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            ////dataset.BuildScanTimes(scansInfo);

            progData.StepRange(100);

            var msnSpectra = new List <MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, provider, ref dataset);

                progData.Status = "Creating LCMS features.";
                features        = CreateLcmsFeatures(dataset,
                                                     msFeatures,
                                                     lcmsFindingOptions,
                                                     lcmsFilteringOptions,
                                                     provider,
                                                     new Progress <ProgressData>(pd => progData.Report(pd.Percent)));

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan   = features.Max(feature => feature.Scan);
                var minScan   = features.Min(feature => feature.Scan);
                var id        = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned    = feature.Net;
                    feature.GroupId       = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId   = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id      = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                             from feature in msFeatures
                             let doesFeatureExists = map.ContainsKey(feature.UmcId)
                                                     where doesFeatureExists
                                                     select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            //if (provider is ISpectraProvider)
            //{
            //    var spectraProvider = provider as ISpectraProvider;
            //    UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider);
            //}

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path))
            {
                UpdateStatus("Reading List of Peptides");
                var idProvider  = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId);
                var peptideList = idProvider.GetAllIdentifications();

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }

            progData.Report(100);

            return(features);
        }