Exemple #1
0
        /// <summary>
        ///     Retrieves a list of features.
        /// </summary>
        /// <param name="rawFile"></param>
        /// <param name="featureFile"></param>
        /// <returns></returns>
        public List <UMCLight> FindFeatures(string rawFile, string featureFile)
        {
            List <UMCLight> features;

            using (ISpectraProvider raw = new InformedProteomicsReader())
            {
                // Read the raw file summary data...
                raw.AddDataFile(rawFile, 0);

                var info = new DatasetInformation();

                info.InputFiles.Add(new InputFile {
                    Path = featureFile, FileType = InputFileType.Features
                });

                var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

                var tolerances = new FeatureTolerances
                {
                    Mass = 8,
                    Net  = .005
                };
                var options = new LcmsFeatureFindingOptions(tolerances);


                // Load and create features
                var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path);
                var provider   = RawLoaderFactory.CreateFileReader(rawFile);
                provider.AddDataFile(rawFile, 0);
                features = finder.FindFeatures(msFeatures, options, provider);
            }
            return(features);
        }
Exemple #2
0
        private async Task <Dictionary <DatasetInformation, IList <UMCLight> > > GetFeatures(IEnumerable <DatasetInformationViewModel> datasets)
        {
            var featureCache = new FeatureLoader {
                Providers = this.analysis.DataProviders
            };
            var datasetFeatures = new Dictionary <DatasetInformation, IList <UMCLight> >();

            foreach (var file in datasets)
            {   // Select only datasets with features.
                IList <UMCLight> feat;
                if (this.featuresByDataset.ContainsKey(file.Dataset))
                {
                    feat = this.featuresByDataset[file.Dataset];
                }
                else
                {
                    var fileInstance = file;
                    feat = await Task.Run(() => UmcLoaderFactory.LoadUmcFeatureData(
                                              fileInstance.Dataset,
                                              featureCache.Providers.FeatureCache));

                    this.featuresByDataset.Add(file.Dataset, feat);
                }

                datasetFeatures.Add(file.Dataset, feat);
            }

            return(datasetFeatures);
        }
Exemple #3
0
        /// <summary>
        ///  Finds features given a dataset
        /// </summary>
        private IList <UMCLight> FindFeatures(DatasetInformation information,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder)

        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(information.RawFile.Path))
            {
                rawProviderX.AddDataFile(information.RawFile.Path, 0);
                UpdateStatus("Creating LCMS Features.");
                var features = featureFinder.FindFeatures(msFeatures,
                                                          featureFindingOptions,
                                                          rawProviderX);
                features = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions, information.ScanTimes);

                var datasetId = information.DatasetId;
                foreach (var feature in features)
                {
                    var lightEntry = new List <MSFeatureLight>();
                    feature.GroupId = datasetId;
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        msFeature.GroupId = datasetId;
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            msmsFeature.GroupId = datasetId;
                            foreach (var peptide in msmsFeature.Peptides)
                            {
                                peptide.GroupId = datasetId;
                            }
                        }

                        if (msFeature.MSnSpectra.Count > 0)
                        {
                            lightEntry.Add(msFeature);
                        }
                    }

                    // We are doing this so that we dont have a ton of MS features in the database
                    feature.MsFeatures.Clear();
                    feature.MsFeatures.AddRange(lightEntry);
                }

                LinkPeptidesToFeatures(information.SequenceFile.Path,
                                       features,
                                       peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                DeRegisterProgressNotifier(featureFinder);
                return(features);
            }
        }
Exemple #4
0
        public async Task PlotMSFeatures()
        {
            //try
            //{
            features = new Dictionary <DatasetInformation, IList <UMCLight> >();
            foreach (var file in selectedDatasets.Where(file => file.FeaturesFound))     // Select only datasets with features.
            {
                var feat = await Task.Run(() => UmcLoaderFactory.LoadUmcFeatureData(file.Features.Path, file.DatasetId,
                                                                                    featureCache.Providers.FeatureCache));

                features.Add(file, feat);
            }
            msFeatureWindowFactory.CreateNewWindow(features);
            //}
            //catch
            //{
            //    MessageBox.Show("Feature cache currently being accessed. Try again in a few moments");
            //}
        }
Exemple #5
0
        public void TestMsFeatureScatterPlot(string path1, string path2, string pngPath)
        {
            // Convert relative paths to absolute paths
            path1   = GetPath(path1);
            path2   = GetPath(path2);
            pngPath = GetPath(pngPath);

            var fiOutput    = new FileInfo(pngPath);
            var didirectory = fiOutput.Directory;

            if (didirectory == null)
            {
                throw new DirectoryNotFoundException(pngPath);
            }

            if (!didirectory.Exists)
            {
                didirectory.Create();
            }

            var aligner           = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions());
            var isosFilterOptions = new DeconToolsIsosFilterOptions();

            var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions);
            var aligneeMs  = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions);
            var finder     = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var tolerances = new FeatureTolerances
            {
                FragmentationWindowSize = .5,
                Mass      = 13,
                DriftTime = .3,
                Net       = .01
            };
            var options = new LcmsFeatureFindingOptions(tolerances);

            options.MaximumNetRange = .002;

            var baseline         = finder.FindFeatures(baselineMs, options, null);
            var alignee          = finder.FindFeatures(aligneeMs, options, null);
            var alignmentResults = aligner.Align(baseline, alignee);

            var plotModel1 = new PlotModel
            {
                Subtitle = "Interpolated, cartesian axes",
                Title    = "HeatMapSeries"
            };

            var palette          = OxyPalettes.Hot(200);
            var linearColorAxis1 = new LinearColorAxis
            {
                InvalidNumberColor = OxyColors.Gray,
                Position           = AxisPosition.Right,
                Palette            = palette
            };

            plotModel1.Axes.Add(linearColorAxis1);


            // linearColorAxis1.

            var linearAxis1 = new LinearAxis {
                Position = AxisPosition.Bottom
            };

            plotModel1.Axes.Add(linearAxis1);

            var linearAxis2 = new LinearAxis();

            plotModel1.Axes.Add(linearAxis2);

            var heatMapSeries1 = new HeatMapSeries
            {
                X0       = 0,
                X1       = 1,
                Y0       = 0,
                Y1       = 1,
                FontSize = .2
            };

            var scores = alignmentResults.HeatScores;
            var width  = scores.GetLength(0);
            var height = scores.GetLength(1);

            heatMapSeries1.Data = new double[width, height];

            var seriesData = heatMapSeries1.Data;

            for (var i = 0; i < width; i++)
            {
                for (var j = 0; j < height; j++)
                {
                    seriesData[i, j] = Convert.ToDouble(scores[i, j]);
                }
            }

            plotModel1.Series.Add(heatMapSeries1);

            var svg       = new SvgExporter();
            var svgString = svg.ExportToString(plotModel1);

            var xml = new XmlDocument();

            xml.LoadXml(svgString);
            var x   = SvgDocument.Open(xml); // Svg.SvgDocument();
            var bmp = x.Draw();

            bmp.Save(pngPath);


            var heatmap       = HeatmapFactory.CreateAlignedHeatmap(alignmentResults.HeatScores, false);
            var netHistogram  = HistogramFactory.CreateHistogram(alignmentResults.NetErrorHistogram, "NET Error", "NET Error");
            var massHistogram = HistogramFactory.CreateHistogram(alignmentResults.MassErrorHistogram, "Mass Error", "Mass Error (ppm)");

            var baseName = Path.Combine(didirectory.FullName, Path.GetFileNameWithoutExtension(fiOutput.Name));

            var encoder = new SvgEncoder();

            PlotImageUtility.SaveImage(heatmap, baseName + "_heatmap.svg", encoder);
            PlotImageUtility.SaveImage(netHistogram, baseName + "_netHistogram.svg", encoder);
            PlotImageUtility.SaveImage(massHistogram, baseName + "_massHistogram.svg", encoder);
        }
Exemple #6
0
        public void TestLcmsWarpAlignment(string path1, string path2, string svgPath)
        {
            // Convert relative paths to absolute paths
            path1   = GetPath(path1);
            path2   = GetPath(path2);
            svgPath = GetPath(HEATMAP_RESULTS_FOLDER_BASE + svgPath);

            var aligner           = new LcmsWarpFeatureAligner(new LcmsWarpAlignmentOptions());
            var isosFilterOptions = new DeconToolsIsosFilterOptions();

            var baselineMs = UmcLoaderFactory.LoadMsFeatureData(path1, isosFilterOptions);
            var aligneeMs  = UmcLoaderFactory.LoadMsFeatureData(path2, isosFilterOptions);
            var finder     = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var tolerances = new FeatureTolerances
            {
                FragmentationWindowSize = .5,
                Mass      = 13,
                DriftTime = .3,
                Net       = .01
            };
            var options = new LcmsFeatureFindingOptions(tolerances)
            {
                MaximumNetRange = .002
            };

            var baseline = finder.FindFeatures(baselineMs, options, null);
            var alignee  = finder.FindFeatures(aligneeMs, options, null);
            var data     = aligner.Align(baseline, alignee);

            var plotModel1 = new PlotModel
            {
                Subtitle = "Interpolated, cartesian axes",
                Title    = "HeatMapSeries"
            };

            var palette          = OxyPalettes.Hot(200);
            var linearColorAxis1 = new LinearColorAxis
            {
                InvalidNumberColor = OxyColors.Gray,
                Position           = AxisPosition.Right,
                Palette            = palette
            };

            plotModel1.Axes.Add(linearColorAxis1);


            // linearColorAxis1.

            var linearAxis1 = new LinearAxis {
                Position = AxisPosition.Bottom
            };

            plotModel1.Axes.Add(linearAxis1);

            var linearAxis2 = new LinearAxis();

            plotModel1.Axes.Add(linearAxis2);

            var heatMapSeries1 = new HeatMapSeries
            {
                X0       = 0,
                X1       = 1,
                Y0       = 0,
                Y1       = 1,
                FontSize = .2
            };

            var scores = data.HeatScores;
            var width  = scores.GetLength(0);
            var height = scores.GetLength(1);

            heatMapSeries1.Data = new double[width, height];


            for (var i = 0; i < width; i++)
            {
                for (var j = 0; j < height; j++)
                {
                    heatMapSeries1.Data[i, j] = Convert.ToDouble(scores[i, j]);
                }
            }

            plotModel1.Series.Add(heatMapSeries1);


            var svg       = new SvgExporter();
            var svgString = svg.ExportToString(plotModel1);

            using (var writer = File.CreateText(svgPath + ".svg"))
            {
                writer.Write(svgString);
            }
        }
Exemple #7
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
                                              IEnumerable <DatasetInformation> aligneeDatasets,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder,
                                              IFeatureAligner <IEnumerable <UMCLight>,
                                                               IEnumerable <UMCLight>,
                                                               AlignmentData> aligner,
                                              IClusterer <UMCLight, UMCClusterLight> clusterer,
                                              string matchPath,
                                              string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = new InformedProteomicsReader())
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                                                                  featureFindingOptions,
                                                                  rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(dataset.RawFile.Path, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                                                                         featureFindingOptions,
                                                                         rawProviderY);
                        LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr,
                                               peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List <UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters     = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider  = providerY;
                        aligner.BaselineSpectraProvider = providerX;


                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data    = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;


                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List <RegressionPoint>();
                        var netPoints  = new List <RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                                  match.AnchorPointY.Mz);
                            var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }


                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId  = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                                                   preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                                                   postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
Exemple #8
0
        public void TestPeptideBands(string directory,
                                     string matchPath)
        {
            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };

            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };

            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };

            var baselineDataset = datasets[0];

            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);
            var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var peptideOptions = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .05,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };

            var features = new List <MSFeatureLight>();

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawFile.Path))
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = finderFinder.FindFeatures(msFeatures,
                                                                 featureFindingOptions,
                                                                 rawProviderX);

                LinkPeptidesToFeatures(baselineDataset.Sequence.Path,
                                       baselineFeatures,
                                       peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures));
                features = features.Where(x => x.HasMsMs()).ToList();
                features = features.OrderBy(x => x.Mz).ToList();

                var peptideList = new List <MSFeatureLight>();
                foreach (var feature in features)
                {
                    foreach (var spectrum in feature.MSnSpectra)
                    {
                        var peptideFound = false;
                        foreach (var peptide in spectrum.Peptides)
                        {
                            peptideList.Add(feature);
                            peptideFound = true;
                            break;
                        }

                        if (peptideFound)
                        {
                            break;
                        }
                    }
                }

                using (var writer = File.CreateText(matchPath))
                {
                    writer.WriteLine("Charge\tpmz\tscan\tNET\t");
                    foreach (var feature in peptideList)
                    {
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan,
                                         feature.Net);
                    }
                }
            }
        }
Exemple #9
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList <UMCLight> LoadDataset(DatasetInformation dataset,
                                            MsFeatureFilteringOptions msFilteringOptions,
                                            LcmsFeatureFindingOptions lcmsFindingOptions,
                                            LcmsFeatureFilteringOptions lcmsFilteringOptions,
                                            DataLoadingOptions dataLoadOptions,
                                            ScanSummaryProviderCache providerCache,
                                            IdentificationProviderCache identificationProviders,
                                            IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress);
            IScanSummaryProvider provider = null;

            if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId);
            }

            progData.StepRange(1);
            progData.Status = "Looking for existing features in the database.";
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId     = dataset.DatasetId;
            var features      = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider);
            var hasMsFeatures = features.Any(f => f.MsFeatures.Any());

            var msFeatures = new List <MSFeatureLight>();

            if (!hasMsFeatures)
            {
                progData.StepRange(2);
                progData.Status = "Loading MS Feature Data.";
                UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                                           dataset.DatasetName));

                var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions();
                msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions);
            }

            progData.StepRange(3);
            progData.Status = "Loading scan summaries.";
            ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            ////dataset.BuildScanTimes(scansInfo);

            progData.StepRange(100);

            var msnSpectra = new List <MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, provider, ref dataset);

                progData.Status = "Creating LCMS features.";
                features        = CreateLcmsFeatures(dataset,
                                                     msFeatures,
                                                     lcmsFindingOptions,
                                                     lcmsFilteringOptions,
                                                     provider,
                                                     new Progress <ProgressData>(pd => progData.Report(pd.Percent)));

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan   = features.Max(feature => feature.Scan);
                var minScan   = features.Min(feature => feature.Scan);
                var id        = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned    = feature.Net;
                    feature.GroupId       = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId   = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id      = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                             from feature in msFeatures
                             let doesFeatureExists = map.ContainsKey(feature.UmcId)
                                                     where doesFeatureExists
                                                     select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            //if (provider is ISpectraProvider)
            //{
            //    var spectraProvider = provider as ISpectraProvider;
            //    UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider);
            //}

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path))
            {
                UpdateStatus("Reading List of Peptides");
                var idProvider  = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId);
                var peptideList = idProvider.GetAllIdentifications();

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }

            progData.Report(100);

            return(features);
        }