Пример #1
0
        public void TestPromexClustering()
        {
            var provider = new ScanSummaryProviderCache();
            var reader1  = provider.GetScanSummaryProvider(pbf1, 0) as InformedProteomicsReader;
            var reader2  = provider.GetScanSummaryProvider(pbf2, 1) as InformedProteomicsReader;
            var reader3  = provider.GetScanSummaryProvider(pbf3, 2) as InformedProteomicsReader;

            var promexFileReader1 = new PromexFileReader(reader1, 0);
            var features1         =
                promexFileReader1.ReadFile(ms1ft1);

            var promexFileReader2 = new PromexFileReader(reader2, 1);
            var features2         =
                promexFileReader2.ReadFile(ms1ft2);

            var promexFileReader3 = new PromexFileReader(reader3, 2);
            var features3         =
                promexFileReader3.ReadFile(ms1ft3);

            var features = new List <UMCLight>();

            features.AddRange(features1);
            features.AddRange(features2);
            features.AddRange(features3);

            var clusterer = new PromexClusterer
            {
                Readers = provider,
            };
            var clusters = clusterer.Cluster(features);

            Console.WriteLine(clusters.Count(c => c.Features.Count > 1));
        }
Пример #2
0
        public void CompareClustering()
        {
            // Cluster using MultiAlign to Promex adapters
            var provider          = new ScanSummaryProviderCache();
            var reader1           = provider.GetScanSummaryProvider(pbf1, 0) as InformedProteomicsReader;
            var reader2           = provider.GetScanSummaryProvider(pbf2, 1) as InformedProteomicsReader;
            var promexFileReader1 = new PromexFileReader(reader1, 0);
            var features1         = promexFileReader1.ReadFile(ms1ft1);

            var promexFileReader2 = new PromexFileReader(reader2, 1);
            var features2         =
                promexFileReader2.ReadFile(ms1ft2);

            var features = new List <UMCLight>();

            features.AddRange(features1);
            features.AddRange(features2);

            var clusterer = new PromexClusterer
            {
                Readers = provider,
            };
            var clusters     = clusterer.Cluster(features);
            var clusterCount = clusters.Count(c => c.UmcList.Count > 1);

            // Cluster using only ProMex
            var lcmsRun1 = PbfLcMsRun.GetLcMsRun(pbf1);
            var lcmsRun2 = PbfLcMsRun.GetLcMsRun(pbf2);

            var aligner         = new LcMsFeatureAlignment(new LcMsFeatureAlignComparer(new Tolerance(10, ToleranceUnit.Ppm)));
            var promexFeatures1 = LcMsFeatureAlignment.LoadProMexResult(0, ms1ft1, lcmsRun1);

            aligner.AddDataSet(0, promexFeatures1, lcmsRun1);

            var promexFeatures2 = LcMsFeatureAlignment.LoadProMexResult(1, ms1ft2, lcmsRun2);

            aligner.AddDataSet(1, promexFeatures2, lcmsRun2);

            aligner.AlignFeatures();
            var promexClusters     = aligner.GetAlignedFeatures();
            var promexClusterCount = promexClusters.Count(c => c.Count(f => f != null) > 1);

            Assert.AreEqual(clusters.Count, promexClusters.Count);
            Assert.AreEqual(clusterCount, promexClusterCount);
        }
Пример #3
0
        public IEnumerable <UMCLight> TestUmcFeatures(string relativePath, int expectedFeatureCount)
        {
            // Get the absolute path
            var path = GetPath(relativePath);

            var reader = new MsFeatureLightFileReader {
                Delimiter = ','
            };
            var newMsFeatures = reader.ReadFile(path);

            var finder = new UmcTreeFeatureFinder
            {
                MaximumNet  = .005,
                MaximumScan = 50
            };
            var tolerances = new FeatureTolerances
            {
                Mass = 8,
                Net  = .005
            };

            var options = new LcmsFeatureFindingOptions(tolerances);

            IScanSummaryProvider provider = null;
            var rawFilePath = path.Replace("_isos.csv", ".raw");

            UpdateStatus("Using raw data to create better features.");

            var providerCache = new ScanSummaryProviderCache();

            provider = providerCache.GetScanSummaryProvider(rawFilePath, 1);

            var features = finder.FindFeatures(newMsFeatures.ToList(), options, provider);

            // Work on total feature count here.
            Assert.Greater(features.Count, 0);

            Assert.AreEqual(expectedFeatureCount, features.Count);

            return(features);
        }
Пример #4
0
        public void CompareFileReading()
        {
            // Read using MultiAlign to Promex adapters
            var provider         = new ScanSummaryProviderCache();
            var reader1          = provider.GetScanSummaryProvider(pbf1, 0) as InformedProteomicsReader;
            var promexFileReader = new PromexFileReader(reader1, 0);
            var features         = promexFileReader.ReadFile(ms1ft1).ToList();

            var lcmsRun        = PbfLcMsRun.GetLcMsRun(pbf1);
            var promexFeatures = LcMsFeatureAlignment.LoadProMexResult(0, ms1ft1, lcmsRun).ToList();

            Assert.AreEqual(features.Count, promexFeatures.Count);

            for (int i = 0; i < features.Count; i++)
            {
                Assert.AreEqual(features[i].MassMonoisotopic, promexFeatures[i].Mass);
                ////Assert.AreEqual(features[i].Mz, promexFeatures[i].RepresentativeMz);
                Assert.AreEqual(features[i].Net, promexFeatures[i].Net);
                Assert.AreEqual(features[i].ScanStart, promexFeatures[i].MinScanNum);
                Assert.AreEqual(features[i].ScanEnd, promexFeatures[i].MaxScanNum);
                Assert.AreEqual(features[i].Abundance, promexFeatures[i].Abundance);
            }
        }
Пример #5
0
        /// <summary>
        ///     Load a single dataset from the provider.
        /// </summary>
        /// <returns></returns>
        public IList <UMCLight> LoadDataset(DatasetInformation dataset,
                                            MsFeatureFilteringOptions msFilteringOptions,
                                            LcmsFeatureFindingOptions lcmsFindingOptions,
                                            LcmsFeatureFilteringOptions lcmsFilteringOptions,
                                            DataLoadingOptions dataLoadOptions,
                                            ScanSummaryProviderCache providerCache,
                                            IdentificationProviderCache identificationProviders,
                                            IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress);
            IScanSummaryProvider provider = null;

            if (!string.IsNullOrWhiteSpace(dataset.RawFile.Path))
            {
                UpdateStatus("Using raw data to create better features.");
                provider = providerCache.GetScanSummaryProvider(dataset.RawFile.Path, dataset.DatasetId);
            }

            progData.StepRange(1);
            progData.Status = "Looking for existing features in the database.";
            UpdateStatus(string.Format("[{0}] - Loading dataset [{0}] - {1}.", dataset.DatasetId, dataset.DatasetName));
            var datasetId     = dataset.DatasetId;
            var features      = UmcLoaderFactory.LoadUmcFeatureData(dataset, Providers.FeatureCache, provider);
            var hasMsFeatures = features.Any(f => f.MsFeatures.Any());

            var msFeatures = new List <MSFeatureLight>();

            if (!hasMsFeatures)
            {
                progData.StepRange(2);
                progData.Status = "Loading MS Feature Data.";
                UpdateStatus(string.Format("[{0}] Loading MS Feature Data [{0}] - {1}.", dataset.DatasetId,
                                           dataset.DatasetName));

                var isosFilterOptions = dataLoadOptions.GetIsosFilterOptions();
                msFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path, isosFilterOptions);
            }

            progData.StepRange(3);
            progData.Status = "Loading scan summaries.";
            ////var scansInfo = UmcLoaderFactory.LoadScanSummaries(dataset.Scans.Path);
            ////dataset.BuildScanTimes(scansInfo);

            progData.StepRange(100);

            var msnSpectra = new List <MSSpectra>();

            // If we don't have any features, then we have to create some from the MS features
            // provided to us.
            if (features.Count < 1)
            {
                msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilteringOptions);
                msFeatures = Filter(msFeatures, provider, ref dataset);

                progData.Status = "Creating LCMS features.";
                features        = CreateLcmsFeatures(dataset,
                                                     msFeatures,
                                                     lcmsFindingOptions,
                                                     lcmsFilteringOptions,
                                                     provider,
                                                     new Progress <ProgressData>(pd => progData.Report(pd.Percent)));

                //var maxScan = Convert.ToDouble(features.Max(feature => feature.Scan));
                //var minScan = Convert.ToDouble(features.Min(feature => feature.Scan));
                var maxScan   = features.Max(feature => feature.Scan);
                var minScan   = features.Min(feature => feature.Scan);
                var id        = 0;
                var scanTimes = dataset.ScanTimes;

                foreach (var feature in features)
                {
                    feature.Id = id++;
                    //feature.Net = (Convert.ToDouble(feature.Scan) - minScan) / (maxScan - minScan);
                    feature.Net = (Convert.ToDouble(scanTimes[feature.Scan]) - scanTimes[minScan]) / (scanTimes[maxScan] - scanTimes[minScan]);
                    feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                    feature.NetAligned    = feature.Net;
                    feature.GroupId       = datasetId;
                    feature.SpectralCount = feature.MsFeatures.Count;

                    foreach (var msFeature in feature.MsFeatures.Where(msFeature => msFeature != null))
                    {
                        msFeature.UmcId   = feature.Id;
                        msFeature.GroupId = datasetId;
                        msFeature.MSnSpectra.ForEach(x => x.GroupId = datasetId);
                        msnSpectra.AddRange(msFeature.MSnSpectra);
                    }
                }
            }
            else
            {
                if (!UmcLoaderFactory.AreExistingFeatures(dataset.Features.Path))
                {
                    var i = 0;
                    foreach (var feature in features)
                    {
                        feature.GroupId = datasetId;
                        feature.Id      = i++;
                    }
                }

                // Otherwise, we need to map the MS features to the LCMS Features provided.
                // This would mean that we extracted data from an existing database.
                if (msFeatures.Count > 0)
                {
                    var map = FeatureDataConverters.MapFeature(features);
                    foreach (var feature in
                             from feature in msFeatures
                             let doesFeatureExists = map.ContainsKey(feature.UmcId)
                                                     where doesFeatureExists
                                                     select feature)
                    {
                        map[feature.UmcId].AddChildFeature(feature);
                    }
                }
            }

            //if (provider is ISpectraProvider)
            //{
            //    var spectraProvider = provider as ISpectraProvider;
            //    UmcLoaderFactory.LoadMsMs(features.ToList(), spectraProvider);
            //}

            // Process the MS/MS data with peptides
            UpdateStatus("Reading List of Peptides");
            if (dataset.SequenceFile != null && !string.IsNullOrEmpty(dataset.SequenceFile.Path))
            {
                UpdateStatus("Reading List of Peptides");
                var idProvider  = identificationProviders.GetProvider(dataset.SequenceFile.Path, dataset.DatasetId);
                var peptideList = idProvider.GetAllIdentifications();

                UpdateStatus("Linking MS/MS to any known Peptide/Metabolite Sequences");

                var linker = new PeptideMsMsLinker();
                linker.LinkPeptidesToSpectra(msnSpectra, peptideList);
            }

            progData.Report(100);

            return(features);
        }