Ejemplo n.º 1
0
        /// <summary>
        ///     Retrieves a list of features.
        /// </summary>
        /// <param name="rawFile"></param>
        /// <param name="featureFile"></param>
        /// <returns></returns>
        public List <UMCLight> FindFeatures(string rawFile, string featureFile)
        {
            List <UMCLight> features;

            using (ISpectraProvider raw = new InformedProteomicsReader())
            {
                // Read the raw file summary data...
                raw.AddDataFile(rawFile, 0);

                var info = new DatasetInformation();

                info.InputFiles.Add(new InputFile {
                    Path = featureFile, FileType = InputFileType.Features
                });

                var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

                var tolerances = new FeatureTolerances
                {
                    Mass = 8,
                    Net  = .005
                };
                var options = new LcmsFeatureFindingOptions(tolerances);


                // Load and create features
                var msFeatures = UmcLoaderFactory.LoadMsFeatureData(info.Features.Path);
                var provider   = RawLoaderFactory.CreateFileReader(rawFile);
                provider.AddDataFile(rawFile, 0);
                features = finder.FindFeatures(msFeatures, options, provider);
            }
            return(features);
        }
Ejemplo n.º 2
0
        public void TestPromexFileReading()
        {
            const int datasetId        = 0;
            var       reader           = new InformedProteomicsReader(datasetId, pbf1);
            var       promexFileReader = new PromexFileReader(reader, datasetId);
            var       features         =
                promexFileReader.ReadFile(ms1ft1);

            Console.WriteLine(features.Count());
        }
Ejemplo n.º 3
0
        private static ISpectraProvider GetProvider(string path)
        {
            if (mRawDataProviders == null)
            {
                mRawDataProviders = new Dictionary <string, ISpectraProvider>();
            }

            ISpectraProvider provider;

            if (!mRawDataProviders.TryGetValue(path, out provider))
            {
                provider = new InformedProteomicsReader(0, path);
                mRawDataProviders.Add(path, provider);
            }

            return(provider);
        }
Ejemplo n.º 4
0
        public void GenerateFigure4_MetaMatches(string directory,
                                                SpectralComparison comparerType,
                                                double mzBinSize,
                                                double mzTolerance,
                                                double netTolerance,
                                                double similarityScoreCutoff,
                                                double peptideScore,
                                                double peptideFdr,
                                                double ionPercent,
                                                int numberOfRequiredPeaks,
                                                string name)
        {
            AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4";

            Console.WriteLine(@"Post-Pre Tests For {0}", directory);

            var cacheFiles = Directory.GetFiles(directory, "*.mscache");

            Console.WriteLine(@"Building data cache");
            var data = cacheFiles.Select(path => new FigureBase.PathCache {
                Cache = path
            }).ToList();

            // The options for the analysis
            var options = new SpectralOptions
            {
                MzBinSize        = mzBinSize,
                MzTolerance      = mzTolerance,
                NetTolerance     = netTolerance,
                SimilarityCutoff = similarityScoreCutoff,
                TopIonPercent    = ionPercent,
                IdScore          = peptideScore,
                ComparerType     = comparerType,
                Fdr = peptideFdr,
                RequiredPeakCount = numberOfRequiredPeaks
            };

            var comparison = 0;

            for (var i = 0; i < data.Count; i++)
            {
                var cachex = data[i];
                // Get the raw path stored in the cache file...
                // then get the dataset object
                var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache);
                var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf);

                // create a raw file reader for the datasets
                using (var readerX = new InformedProteomicsReader())
                {
                    // wrap it in the cached object so we can load scan meta-data
                    var cacheReaderX = new RawLoaderCache(readerX);
                    var cacheDataX   = ScanSummaryCache.ReadCache(cachex.Cache);

                    readerX.AddDataFile(rawPathX, 0);
                    cacheReaderX.AddCache(0, cacheDataX);

                    for (var j = i + 1; j < data.Count; j++)
                    {
                        var cachey = data[j];
                        // Get the raw path stored in the cache file...
                        // then get the dataset object
                        var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache);
                        var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf);

                        // create a raw file reader for the datasets
                        using (var readerY = new InformedProteomicsReader())
                        {
                            // Then the writer for creating a report
                            var writer =
                                AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3, name + comparison);
                            comparison++;

                            // wrap it in the cached object so we can load scan meta-data
                            var cacheReaderY = new RawLoaderCache(readerY);
                            var cacheDataY   = ScanSummaryCache.ReadCache(cachey.Cache);
                            cacheReaderY.AddCache(0, cacheDataY);
                            readerY.AddDataFile(rawPathY, 0);
                            var names = new List <string> {
                                data[i].Cache, data[j].Cache
                            };

                            var analysis = MatchDatasets(comparerType,
                                                         readerX,
                                                         readerY,
                                                         options,
                                                         datasetX,
                                                         datasetY,
                                                         names);

                            AlignMatches(analysis, writer);
                            writer.Close();
                        }
                    }
                }
            }
        }
Ejemplo n.º 5
0
 public PromexFileReader(InformedProteomicsReader reader, int datasetId)
 {
     this.reader    = reader;
     this.datasetId = datasetId;
 }
Ejemplo n.º 6
0
        private MSSpectra GetSpectrum(string path, int scan)
        {
            ISpectraProvider reader = new InformedProteomicsReader(0, path);

            return(GetSpectrum(reader, scan, 0));
        }
Ejemplo n.º 7
0
        public IEnumerable <UMCLight> CreateXicNew(List <UMCLight> features,
                                                   double massError,
                                                   InformedProteomicsReader provider,
                                                   bool refine = true,
                                                   IProgress <ProgressData> progress = null)
        {
            var progressData = new ProgressData(progress);
            int id = 0, count = 0;
            int msmsFeatureId  = 0;
            var resultFeatures = new List <UMCLight> {
                Capacity = features.Count
            };
            var ipr = provider.LcMsRun;

            ipr.HigherPrecursorChromatogramCacheSize = 2000;

            features.Sort((x, y) => x.Mz.CompareTo(y.Mz));

            // Iterate over XIC targets.
            foreach (var xicTarget in CreateXicTargetsYield(features, massError))
            {
                count++;
                // Read XIC
                var target = xicTarget.StartScan + ((xicTarget.EndScan - xicTarget.StartScan) / 2);
                var xic    = ipr.GetPrecursorExtractedIonChromatogram(xicTarget.LowMz, xicTarget.HighMz, target);

                if (refine)
                {
                    var xicRefiner = this.XicRefiner ?? new XicRefiner();
                    xic = xicRefiner.RefineXic(xic);
                }

                if (xic.Count < 3)
                {
                    continue;
                }

                var minEt  = ipr.GetElutionTime(ipr.MinLcScan);
                var maxEt  = ipr.GetElutionTime(ipr.MaxLcScan);
                var diffEt = maxEt - minEt;

                // Add xic points as MSFeatures.
                xicTarget.Feature.MsFeatures.Clear();
                foreach (var point in xic)
                {
                    xicTarget.Feature.AddChildFeature(new MSFeatureLight
                    {
                        ChargeState      = xicTarget.ChargeState,
                        Mz               = xicTarget.Mz,
                        MassMonoisotopic = xicTarget.Feature.MassMonoisotopic,
                        Scan             = point.ScanNum,
                        Abundance        = Convert.ToInt64(point.Intensity),
                        Id               = id++,
                        DriftTime        = xicTarget.Feature.DriftTime,
                        Net              = (ipr.GetElutionTime(point.ScanNum) - minEt) / diffEt,
                        GroupId          = xicTarget.Feature.GroupId
                    });
                }

                // Associate MS/MS information.
                var ms2Scans = ipr.GetFragmentationSpectraScanNums(xicTarget.Feature.Mz).ToArray();
                int j        = 0;
                for (int i = 0; i < xicTarget.Feature.MsFeatures.Count; i++)
                {
                    for (; j < ms2Scans.Length; j++)
                    {
                        // Scan below UMC feature scan range.
                        if (ms2Scans[j] < xicTarget.Feature.MsFeatures[i].Scan)
                        {
                            break;
                        }

                        // Haven't reached the last ms2 scan and ms2 scan is larger than next feature, could be associated with next feature
                        if (i < xicTarget.Feature.MsFeatures.Count - 1 && ms2Scans[j] > xicTarget.Feature.MsFeatures[i + 1].Scan)
                        {
                            break;
                        }

                        // We're on the last MSFeature - is the MS/MS scan actually for this feature?
                        if (i == xicTarget.Feature.MsFeatures.Count - 1 &&
                            ipr.GetPrevScanNum(ms2Scans[j], 1) != xicTarget.Feature.MsFeatures[i].Scan)
                        {
                            continue;
                        }

                        // Otherwise this is a MS/MS we want to add!
                        var spectraData = new MSSpectra
                        {
                            Id           = msmsFeatureId++,
                            ScanMetaData = new ScanSummary
                            {
                                MsLevel     = 2,
                                Scan        = ms2Scans[j],
                                PrecursorMz = xicTarget.Feature.MsFeatures[i].Mz,
                            },
                            CollisionType = CollisionType.None,
                            Scan          = ms2Scans[j],
                            PrecursorMz   = xicTarget.Feature.MsFeatures[i].Mz
                        };
                        xicTarget.Feature.MsFeatures[i].MSnSpectra.Add(spectraData);
                    }
                }

                resultFeatures.Add(xicTarget.Feature);
                if (count % 100 == 0 || count == features.Count - 1)
                {
                    progressData.Report(count, features.Count);
                }
            }

            return(resultFeatures);
        }
Ejemplo n.º 8
0
        public void CreateUMCClusterLight(string databasePath, bool indexDatabase)
        {
            // If the database is not index then do so...but before the session to the db is opened.
            if (indexDatabase)
            {
                DatabaseIndexer.IndexClusters(databasePath);
                DatabaseIndexer.IndexFeatures(databasePath);
            }

            // This is a factory based method that creates a set of data access providers used throughout MultiAlign
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false);

            // If you just wanted the clusters you could do this:
            // 1. Connect to the database
            //NHibernateUtil.ConnectToDatabase(databasePath, false);
            // 2. Then extract all of the clusters
            //IUmcClusterDAO clusterCache     = new UmcClusterDAOHibernate();
            //List<UMCClusterLight> clusters  = clusterCache.FindAll();

            var clusters              = providers.ClusterCache.FindAll();
            var shouldGetMsFeatures   = true;
            var shouldGetMsMsFeatures = true;
            var shouldGetRawData      = false;

            // This gets all of the dataset information and maps to a dictionary...if you want the raw data
            // otherwise comment this out.
            var datasets   = providers.DatasetCache.FindAll();
            var datasetMap = new Dictionary <int, DatasetInformation>();

            datasets.ForEach(x => datasetMap.Add(x.DatasetId, x));

            foreach (var cluster in clusters)
            {
                cluster.ReconstructUMCCluster(providers,
                                              true,
                                              false,
                                              shouldGetMsFeatures,
                                              shouldGetMsMsFeatures);

                foreach (var feature in cluster.Features)
                {
                    foreach (var msFeature in feature.Features)
                    {
                        foreach (var spectrumMetaData in msFeature.MSnSpectra)
                        {
                            // then you can do stuff with the ms/ms spectra
                            // If you had the path to the raw file, you could create a reader for you to extract the MS/MS spectra
                            // This supports mzXML and .RAW Thermo files based on the file extension.
                            if (shouldGetRawData)
                            {
                                DatasetInformation info = null;
                                var hasKey = datasetMap.TryGetValue(spectrumMetaData.GroupId, out info);
                                if (hasKey)
                                {
                                    if (info.RawFile != null)
                                    {
                                        // This might seem kind of klunky, but it's called a bridge, this way I can access
                                        // MS/MS spectra from PNNLOmics without having to reference any of the Thermo DLL's
                                        // Nor support file reading capability.  This is also nice because I don't have to load
                                        // several MS/MS spectra when analyzing large datasets for my spectral clustering work.
                                        var rawReader = new InformedProteomicsReader(spectrumMetaData.GroupId, info.RawFile.Path);

                                        // Then grab the actual spectrum...
                                        var summary  = new ScanSummary();
                                        var spectrum = rawReader.GetRawSpectra(spectrumMetaData.Scan, 2, out summary);

                                        // Then do what you want...
                                        // Profit???
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
Ejemplo n.º 9
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
                                              IEnumerable <DatasetInformation> aligneeDatasets,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder,
                                              IFeatureAligner <IEnumerable <UMCLight>,
                                                               IEnumerable <UMCLight>,
                                                               AlignmentData> aligner,
                                              IClusterer <UMCLight, UMCClusterLight> clusterer,
                                              string matchPath,
                                              string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = new InformedProteomicsReader())
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                                                                  featureFindingOptions,
                                                                  rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(dataset.RawFile.Path, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                                                                         featureFindingOptions,
                                                                         rawProviderY);
                        LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr,
                                               peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List <UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters     = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider  = providerY;
                        aligner.BaselineSpectraProvider = providerX;


                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data    = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;


                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List <RegressionPoint>();
                        var netPoints  = new List <RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                                  match.AnchorPointY.Mz);
                            var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }


                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId  = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                                                   preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                                                   postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
Ejemplo n.º 10
0
        public void GenerateClusterAlignmentStatistics(string relativeDatabasePath,
                                                       string relativeName,
                                                       string name,
                                                       FeatureAlignmentType alignmentType,
                                                       LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var databasePath = GetPath(relativeDatabasePath);
            var outputPath   = GetOutputPath(relativeName);

            if (!Directory.Exists(outputPath))
            {
                Directory.CreateDirectory(outputPath);
            }

            // Connect to the NHibernate database
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions  = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };

            UpdateStatus("Retrieving all datasets for test.");
            var datasets = providers.DatasetCache.FindAll();

            // Create our algorithms
            var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                                                                     alignmentOptions.LCMSWarpOptions,
                                                                     spectralOptions);
            var clusterer = ClusterFactory.Create(clusterType);

            clusterer.Parameters = new FeatureClusterParameters <UMCLight>
            {
                Tolerances = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(clusterer);

            for (var i = 0; i < datasets.Count - 1; i++)
            {
                var matchPath = string.Format("{0}-{1}-matches.txt", name, i);
                var errorPath = string.Format("{0}-{1}-errors.txt", name, i);

                matchPath = Path.Combine(outputPath, matchPath);
                errorPath = Path.Combine(outputPath, errorPath);



                var aligneeDataset  = datasets[i + 1];
                var baselineDataset = datasets[i];

                // Load the baseline reference set
                using (var rawProviderX = new InformedProteomicsReader())
                {
                    rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                    // Load the baseline reference set
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(aligneeDataset.RawFile.Path, 0);

                        var baselineFeatures = RetrieveFeatures(baselineDataset.DatasetId, providers);
                        var aligneeFeatures  = RetrieveFeatures(aligneeDataset.DatasetId, providers);
                        var providerX        = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);
                        var providerY        = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        AlignDatasets(baselineFeatures,
                                      aligneeFeatures,
                                      providerX,
                                      providerY,
                                      aligner,
                                      clusterer,
                                      matchPath,
                                      errorPath);
                    }
                }
            }
        }
Ejemplo n.º 11
0
        public void GenerateFigure3_Matches(string directory,
                                            SpectralComparison comparerType,
                                            double mzBinSize,
                                            double mzTolerance,
                                            double netTolerance,
                                            double similarityScoreCutoff,
                                            double peptideScore,
                                            double peptideFdr,
                                            double ionPercent,
                                            int numberOfRequiredPeaks)
        {
            AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4";

            Console.WriteLine(@"Post-Pre Tests For {0}", directory);

            var cacheFiles = Directory.GetFiles(directory, "*.mscache");
            var msgfFiles  = Directory.GetFiles(directory, "*_msgfdb_fht.txt");

            Console.WriteLine(@"Building data cache");
            var map = cacheFiles.ToDictionary <string, string, FigureBase.PathCache>(path => path.ToLower(), path => null);

            var data = (from path in msgfFiles
                        let name = path.ToLower().Replace("_msgfdb_fht.txt", ".mscache")
                                   let newName = Path.Combine(directory, name)
                                                 let features = Path.Combine(directory, name)
                                                                where map.ContainsKey(newName)
                                                                select new FigureBase.PathCache {
                Cache = newName, Msgf = path, Features = features
            }).ToList();


            // The options for the analysis
            var options = new SpectralOptions
            {
                MzBinSize        = mzBinSize,
                MzTolerance      = mzTolerance,
                NetTolerance     = netTolerance,
                SimilarityCutoff = similarityScoreCutoff,
                TopIonPercent    = ionPercent,
                IdScore          = peptideScore,
                ComparerType     = comparerType,
                Fdr = peptideFdr,
                RequiredPeakCount = numberOfRequiredPeaks
            };

            Console.WriteLine(@"{0}", data.Count);

            var comparison = 0;

            for (var i = 0; i < data.Count; i++)
            {
                var cachex = data[i];
                // Get the raw path stored in the cache file...
                // then get the dataset object
                var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache);
                var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf);

                // create a raw file reader for the datasets
                using (var readerX = new InformedProteomicsReader())
                {
                    // wrap it in the cached object so we can load scan meta-data
                    var cacheReaderX = new RawLoaderCache(readerX);
                    var cacheDataX   = ScanSummaryCache.ReadCache(cachex.Cache);

                    readerX.AddDataFile(rawPathX, 0);
                    cacheReaderX.AddCache(0, cacheDataX);

                    for (var j = i + 1; j < data.Count; j++)
                    {
                        // Then the writer for creating a report
                        var writer =
                            AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3,
                                                                  "results-figure3-largeScale" + comparison);
                        comparison++;

                        var cachey = data[j];
                        // Get the raw path stored in the cache file...
                        // then get the dataset object
                        var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache);
                        var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf);

                        // create a raw file reader for the datasets
                        using (var readerY = new InformedProteomicsReader())
                        {
                            // wrap it in the cached object so we can load scan meta-data
                            var cacheReaderY = new RawLoaderCache(readerY);
                            var cacheDataY   = ScanSummaryCache.ReadCache(cachey.Cache);
                            cacheReaderY.AddCache(0, cacheDataY);
                            readerY.AddDataFile(rawPathY, 0);
                            var names = new List <string> {
                                data[i].Cache, data[j].Cache
                            };

                            // Write the results
                            var analysis = MatchDatasets(comparerType,
                                                         cacheReaderX,
                                                         cacheReaderY,
                                                         options,
                                                         datasetX,
                                                         datasetY,
                                                         names);

                            AlignMatches(analysis, writer);
                        }
                    }
                }
            }
        }
Ejemplo n.º 12
0
        public static void ExportMsMs(this UMCClusterLight cluster, string path, List <DatasetInformation> datasets,
                                      IMsMsSpectraWriter writer)
        {
            // Let's map the datasets first.
            var readers     = new Dictionary <int, ISpectraProvider>();
            var information = new Dictionary <int, DatasetInformation>();

            datasets.ForEach(x => information.Add(x.DatasetId, x));

            // We are only loading what datasets we have to here!
            // The point is, each cluster or feature may have come from a different raw data source...
            // since we dont store all of the data in memory, we have to fetch it from the appropriate source.
            // This means that we have to go into the raw data and get the scans for an MSMS spectra.
            foreach (var feature in cluster.Features)
            {
                if (!readers.ContainsKey(feature.GroupId))
                {
                    if (information.ContainsKey(feature.GroupId))
                    {
                        var singleInfo = information[feature.GroupId];

                        if (singleInfo.Raw != null && singleInfo.RawPath != null)
                        {
                            // Make sure that we have a file.
                            if (!File.Exists(singleInfo.RawPath))
                            {
                                continue;
                            }

                            // Here we create a data file reader for the file we want to access.
                            var provider = new InformedProteomicsReader();
                            // Then we make sure we key it to the provider.
                            provider.AddDataFile(singleInfo.RawPath, feature.GroupId);
                            // Then make sure we map it for a dataset, so when we sort through a cluster
                            // we make sure that we can access in O(1) time.
                            readers.Add(feature.GroupId, provider);
                        }
                    }
                }
            }

            // We flag the first write, so that if the file exists, we overwrite.  They should have done
            // checking to make sure that the file was already created...we dont care.
            var firstWrite = true;

            foreach (var feature in cluster.Features)
            {
                if (readers.ContainsKey(feature.GroupId))
                {
                    var provider = readers[feature.GroupId];
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        foreach (var spectrum in msFeature.MSnSpectra)
                        {
                            var summary = new ScanSummary();
                            var data    = provider.GetRawSpectra(spectrum.Scan, spectrum.GroupId, out summary);
                            spectrum.Peaks        = data;
                            spectrum.ScanMetaData = summary;
                        }
                        if (firstWrite)
                        {
                            writer.Write(path, msFeature.MSnSpectra);
                        }
                        else
                        {
                            writer.Append(path, msFeature.MSnSpectra);
                        }
                    }
                }
            }
        }