Beispiel #1
0
        ///// <summary>
        ///// Matches two datasets based on spectral similarity.
        ///// </summary>
        ///// <param name="readerX"></param>
        ///// <param name="readerY"></param>
        ///// <param name="options"></param>
        ///// <returns></returns>
        //public  MatchDatasets(ISpectraProvider readerX,
        //                                                        ISpectraProvider readerY,
        //                                                        SpectralOptions options)
        //{
        //    // This helps us compare various comparison calculation methods
        //    var comparer    = SpectralComparerFactory.CreateSpectraComparer(options.ComparerType);

        //    // This guy filters the spectra, so that we only keep the N most intense ions for comparison
        //    var filter      = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent);

        //    // Here we find all the matches
        //    var finder      = new SpectralAnchorPointFinderOriginal();
        //    return finder.FindAnchorPoints(readerX,
        //                                          readerY,
        //                                          comparer,
        //                                          filter,
        //                                          options);
        //}

        public void ValidateMatches(IEnumerable <SpectralAnchorPointMatch> matches,
                                    IEnumerable <Peptide> peptidesA,
                                    IEnumerable <Peptide> peptidesB,
                                    SpectralOptions options)
        {
            IEnumerable <SpectralAnchorPointMatch> anchorPointMatches = matches as SpectralAnchorPointMatch[] ?? matches.ToArray();

            // If the list has peptides...then we should validate matches
            var enumerable    = peptidesB as Peptide[] ?? peptidesB.ToArray();
            var peptides      = peptidesA as Peptide[] ?? peptidesA.ToArray();
            var matchPeptides = (peptides.Any() && enumerable.Any());

            if (matchPeptides)
            {
                peptidesA = peptides.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList();
                peptidesB = enumerable.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList();

                var peptideMapX = PeptideUtility.MapWithBestScan(peptidesA);
                var peptideMapY = PeptideUtility.MapWithBestScan(peptidesB);

                // Then map the peptide sequences to identify True Positive and False Positives
                var matcher = new PeptideAnchorPointMatcher();
                matcher.Match(anchorPointMatches,
                              peptideMapX,
                              peptideMapY,
                              options);
            }
        }
Beispiel #2
0
        public static IFeatureAligner <MassTagDatabase, IEnumerable <UMCLight>, AlignmentData> CreateDatabaseAligner(
            FeatureAlignmentType type,
            LcmsWarpAlignmentOptions options,
            SpectralOptions spectralOptions)
        {
            IFeatureAligner <MassTagDatabase, IEnumerable <UMCLight>, AlignmentData> aligner = null;

            switch (type)
            {
            case FeatureAlignmentType.LCMS_WARP:
                aligner = new LcmsWarpFeatureAligner(options);
                break;

            case FeatureAlignmentType.DIRECT_IMS_INFUSION:
                aligner = new DummyAlignment();
                break;

            case FeatureAlignmentType.SPECTRAL_ALIGNMENT:
                aligner = new SpectralAlignerWrapper {
                    Options = spectralOptions
                };
                break;
            }

            return(aligner);
        }
Beispiel #3
0
        /// <summary>
        ///  Finds features given a dataset
        /// </summary>
        private IList <UMCLight> FindFeatures(DatasetInformation information,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder)

        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(information.RawFile.Path))
            {
                rawProviderX.AddDataFile(information.RawFile.Path, 0);
                UpdateStatus("Creating LCMS Features.");
                var features = featureFinder.FindFeatures(msFeatures,
                                                          featureFindingOptions,
                                                          rawProviderX);
                features = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions, information.ScanTimes);

                var datasetId = information.DatasetId;
                foreach (var feature in features)
                {
                    var lightEntry = new List <MSFeatureLight>();
                    feature.GroupId = datasetId;
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        msFeature.GroupId = datasetId;
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            msmsFeature.GroupId = datasetId;
                            foreach (var peptide in msmsFeature.Peptides)
                            {
                                peptide.GroupId = datasetId;
                            }
                        }

                        if (msFeature.MSnSpectra.Count > 0)
                        {
                            lightEntry.Add(msFeature);
                        }
                    }

                    // We are doing this so that we dont have a ton of MS features in the database
                    feature.MsFeatures.Clear();
                    feature.MsFeatures.AddRange(lightEntry);
                }

                LinkPeptidesToFeatures(information.SequenceFile.Path,
                                       features,
                                       peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                DeRegisterProgressNotifier(featureFinder);
                return(features);
            }
        }
Beispiel #4
0
        /// <summary>
        /// Links anchor points use the raw spectra provided.
        /// </summary>
        public IEnumerable <SpectralAnchorPointMatch> FindAnchorPoints(IEnumerable <Peptide> peptidesA,
                                                                       IEnumerable <Peptide> peptidesB,
                                                                       SpectralOptions options)
        {
            var matches = new List <SpectralAnchorPointMatch>();

            peptidesA = AssignNET(peptidesA);
            peptidesB = AssignNET(peptidesB);

            // Map sequences
            var mapA = PeptideUtility.MapWithBestSequence(peptidesA);
            var mapB = PeptideUtility.MapWithBestSequence(peptidesB);

            foreach (var sequence in mapB.Keys)
            {
                if (mapA.ContainsKey(sequence))
                {
                    var point = new SpectralAnchorPointMatch
                    {
                        AnchorPointX = { Peptide = mapA[sequence] },
                        AnchorPointY = { Peptide = mapB[sequence] }
                    };

                    var net = point.AnchorPointX.Net - point.AnchorPointY.Net;
                    var mz  = point.AnchorPointX.Mz - point.AnchorPointY.Mz;

                    if (Math.Abs(net) < options.NetTolerance && Math.Abs(mz) < options.MzTolerance)
                    {
                        matches.Add(point);
                    }
                }
            }

            return(matches);
        }
Beispiel #5
0
        /// <summary>
        ///     Builds the feature aligner.
        /// </summary>
        public void BuildAligner(LcmsWarpAlignmentOptions options, SpectralOptions spectralOptions)
        {
            m_provider.DatasetAligner = FeatureAlignerFactory.CreateDatasetAligner(options.AlignmentAlgorithmType, options,
                                                                                   spectralOptions);

            m_provider.DatabaseAligner = FeatureAlignerFactory.CreateDatabaseAligner(options.AlignmentAlgorithmType, options,
                                                                                     spectralOptions);
        }
Beispiel #6
0
        /// <summary>
        ///     Builds the feature aligner.
        /// </summary>
        public void BuildAligner(LcmsWarpAlignmentOptions options, SpectralOptions spectralOptions)
        {
            m_provider.DatasetAligner = FeatureAlignerFactory.CreateDatasetAligner(options.AlignmentAlgorithmType, options,
                spectralOptions);

            m_provider.DatabaseAligner = FeatureAlignerFactory.CreateDatabaseAligner(options.AlignmentAlgorithmType, options,
                spectralOptions);
        }
Beispiel #7
0
 /// <summary>
 /// Constructor
 /// </summary>
 public SpectralAnalysis()
 {
     MassData     = new AlignmentMeasurement <double>();
     NetData      = new AlignmentMeasurement <double>();
     Matches      = new List <SpectralAnchorPointMatch>();
     Options      = new SpectralOptions();
     DatasetNames = new List <string>();
 }
Beispiel #8
0
        /// <summary>
        /// Matches anchor points to peptide data.
        /// </summary>
        /// <param name="matches"></param>
        /// <param name="peptideMapX"></param>
        /// <param name="peptideMapY"></param>
        /// <param name="options"></param>
        public void Match(IEnumerable <SpectralAnchorPointMatch> matches,
                          Dictionary <int, Peptide> peptideMapX,
                          Dictionary <int, Peptide> peptideMapY,
                          SpectralOptions options)
        {
            foreach (var match in matches)
            {
                var scanX = match.AnchorPointX.Scan;
                var scanY = match.AnchorPointY.Scan;
                // Assume the spectrum was not identified first...then prove a false match later
                var isMatch = AnchorPointMatchType.PeptideFailed;

                if (!peptideMapX.ContainsKey(scanX))
                {
                    match.IsValidMatch = isMatch;
                    continue;
                }
                if (!peptideMapY.ContainsKey(scanY))
                {
                    match.IsValidMatch = isMatch;
                    continue;
                }

                var peptidex = peptideMapX[scanX];
                var peptidey = peptideMapY[scanY];
                if (peptidex == null || peptidey == null)
                {
                    match.IsValidMatch = isMatch;
                    continue;
                }

                peptidex.Sequence = PeptideUtility.CleanString(peptidex.Sequence);
                peptidey.Sequence = PeptideUtility.CleanString(peptidey.Sequence);

                // Make sure the peptides are equivalent.
                if (peptidex.Sequence.Equals(peptidey.Sequence) && !string.IsNullOrWhiteSpace(peptidey.Sequence))
                {
                    isMatch = AnchorPointMatchType.TrueMatch;
                }
                else
                {
                    isMatch = AnchorPointMatchType.FalseMatch;
                }

                // Then link as true positive.
                match.AnchorPointX.Peptide = peptidex;
                match.AnchorPointY.Peptide = peptidey;
                match.IsValidMatch         = isMatch;
            }
        }
Beispiel #9
0
        private void MatchPeptides(AlignmentDataset datasetX,
                                   AlignmentDataset datasetY,
                                   Dictionary <int, ScanSummary> scanDataX,
                                   Dictionary <int, ScanSummary> scanDataY,
                                   IEnumerable <string> names,
                                   SpectralOptions options)
        {
            // Read data for peptides
            var reader    = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF);
            var peptidesA = reader.Read(datasetX.PeptideFile);
            var peptidesB = reader.Read(datasetY.PeptideFile);

            peptidesA =
                peptidesA.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList();
            peptidesB =
                peptidesB.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList();

            var peptideMapX = PeptideUtility.MapWithBestScan(peptidesA);
            var peptideMapY = PeptideUtility.MapWithBestScan(peptidesB);

            // Determine the scan extrema
            var maxX = scanDataX.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
            var minX = scanDataX.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;
            var maxY = scanDataY.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
            var minY = scanDataY.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;

            // Then map the peptide sequences to identify True Positive and False Positives
            var count = (from scanx in peptideMapX.Keys
                         let peptideX = peptideMapX[scanx]
                                        from scany in peptideMapY.Keys
                                        let peptideY = peptideMapY[scany]
                                                       let netX = Convert.ToDouble(scanx - minX) / Convert.ToDouble(maxX - minX)
                                                                  let netY = Convert.ToDouble(scany - minY) / Convert.ToDouble(maxY - minY)
                                                                             let net = Convert.ToDouble(netX - netY)
                                                                                       where Math.Abs(net) < options.NetTolerance
                                                                                       where Math.Abs(peptideX.Mz - peptideY.Mz) < options.MzTolerance
                                                                                       where PeptideUtility.PassesCutoff(peptideX, options.IdScore, options.Fdr) &&
                                                                                       PeptideUtility.PassesCutoff(peptideY, options.IdScore, options.Fdr) &&
                                                                                       peptideX.Sequence.Equals(peptideY.Sequence)
                                                                                       select peptideX).Count();

            Console.WriteLine();
            foreach (var name in names)
            {
                Console.WriteLine(name);
            }
            Console.WriteLine(@"Matches - {0}", count);
        }
        public static IFeatureAligner<IEnumerable<UMCLight>, IEnumerable<UMCLight>, classAlignmentData> CreateDatasetAligner(FeatureAlignmentType type,
            LcmsWarpAlignmentOptions options,
            SpectralOptions spectralOptions)
        {
            IFeatureAligner<IEnumerable<UMCLight>, IEnumerable<UMCLight>, classAlignmentData> aligner = null;
            switch (type)
            {
                case FeatureAlignmentType.LCMS_WARP:
                    aligner = new LcmsWarpFeatureAligner {Options = options};
                    break;
                case FeatureAlignmentType.DIRECT_IMS_INFUSION:
                    aligner = new DummyAlignment();
                    break;
                case FeatureAlignmentType.SPECTRAL_ALIGNMENT:
                    aligner = new SpectralAlignerWrapper {Options = spectralOptions, Bandwidth = Bandwidth};
                    break;
            }

            return aligner;
        }
Beispiel #11
0
        protected static SpectralAnalysis MatchDatasets(SpectralComparison comparerType,
                                                        ISpectraProvider readerX,
                                                        ISpectraProvider readerY,
                                                        SpectralOptions options,
                                                        AlignmentDataset datasetX,
                                                        AlignmentDataset datasetY,
                                                        List <string> names)
        {
            var peptideReader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF);
            var finder        = new SpectralAnchorPointFinder();
            var validator     = new SpectralAnchorPointValidator();
            var comparer      = SpectralComparerFactory.CreateSpectraComparer(comparerType);
            var filter        = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent);

            var matches = finder.FindAnchorPoints(readerX,
                                                  readerY,
                                                  comparer,
                                                  filter,
                                                  options);

            var peptidesX = peptideReader.Read(datasetX.PeptideFile);
            var peptidesY = peptideReader.Read(datasetY.PeptideFile);

            validator.ValidateMatches(matches,
                                      peptidesX,
                                      peptidesY,
                                      options);

            var analysis = new SpectralAnalysis
            {
                DatasetNames = names,
                Matches      = matches,
                Options      = options
            };

            return(analysis);
        }
Beispiel #12
0
        public void GenerateFigure4_MetaMatches(string directory,
                                                SpectralComparison comparerType,
                                                double mzBinSize,
                                                double mzTolerance,
                                                double netTolerance,
                                                double similarityScoreCutoff,
                                                double peptideScore,
                                                double peptideFdr,
                                                double ionPercent,
                                                int numberOfRequiredPeaks,
                                                string name)
        {
            AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4";

            Console.WriteLine(@"Post-Pre Tests For {0}", directory);

            var cacheFiles = Directory.GetFiles(directory, "*.mscache");

            Console.WriteLine(@"Building data cache");
            var data = cacheFiles.Select(path => new FigureBase.PathCache {
                Cache = path
            }).ToList();

            // The options for the analysis
            var options = new SpectralOptions
            {
                MzBinSize        = mzBinSize,
                MzTolerance      = mzTolerance,
                NetTolerance     = netTolerance,
                SimilarityCutoff = similarityScoreCutoff,
                TopIonPercent    = ionPercent,
                IdScore          = peptideScore,
                ComparerType     = comparerType,
                Fdr = peptideFdr,
                RequiredPeakCount = numberOfRequiredPeaks
            };

            var comparison = 0;

            for (var i = 0; i < data.Count; i++)
            {
                var cachex = data[i];
                // Get the raw path stored in the cache file...
                // then get the dataset object
                var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache);
                var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf);

                // create a raw file reader for the datasets
                using (var readerX = new InformedProteomicsReader())
                {
                    // wrap it in the cached object so we can load scan meta-data
                    var cacheReaderX = new RawLoaderCache(readerX);
                    var cacheDataX   = ScanSummaryCache.ReadCache(cachex.Cache);

                    readerX.AddDataFile(rawPathX, 0);
                    cacheReaderX.AddCache(0, cacheDataX);

                    for (var j = i + 1; j < data.Count; j++)
                    {
                        var cachey = data[j];
                        // Get the raw path stored in the cache file...
                        // then get the dataset object
                        var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache);
                        var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf);

                        // create a raw file reader for the datasets
                        using (var readerY = new InformedProteomicsReader())
                        {
                            // Then the writer for creating a report
                            var writer =
                                AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3, name + comparison);
                            comparison++;

                            // wrap it in the cached object so we can load scan meta-data
                            var cacheReaderY = new RawLoaderCache(readerY);
                            var cacheDataY   = ScanSummaryCache.ReadCache(cachey.Cache);
                            cacheReaderY.AddCache(0, cacheDataY);
                            readerY.AddDataFile(rawPathY, 0);
                            var names = new List <string> {
                                data[i].Cache, data[j].Cache
                            };

                            var analysis = MatchDatasets(comparerType,
                                                         readerX,
                                                         readerY,
                                                         options,
                                                         datasetX,
                                                         datasetY,
                                                         names);

                            AlignMatches(analysis, writer);
                            writer.Close();
                        }
                    }
                }
            }
        }
Beispiel #13
0
        public void GenerateFigure4_MetaMatches(string directory,
            SpectralComparison comparerType,
            double mzBinSize,
            double mzTolerance,
            double netTolerance,
            double similarityScoreCutoff,
            double peptideScore,
            double peptideFdr,
            double ionPercent,
            int numberOfRequiredPeaks,
            string name)
        {
            AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4";

            Console.WriteLine(@"Post-Pre Tests For {0}", directory);

            var cacheFiles = Directory.GetFiles(directory, "*.mscache");
            Console.WriteLine(@"Building data cache");
            var data = cacheFiles.Select(path => new FigureBase.PathCache { Cache = path }).ToList();

            // The options for the analysis
            var options = new SpectralOptions
            {
                MzBinSize = mzBinSize,
                MzTolerance = mzTolerance,
                NetTolerance = netTolerance,
                SimilarityCutoff = similarityScoreCutoff,
                TopIonPercent = ionPercent,
                IdScore = peptideScore,
                ComparerType = comparerType,
                Fdr = peptideFdr,
                RequiredPeakCount = numberOfRequiredPeaks
            };

            var comparison = 0;
            for (var i = 0; i < data.Count; i++)
            {
                var cachex = data[i];
                // Get the raw path stored in the cache file...
                // then get the dataset object
                var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache);
                var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf);

                // create a raw file reader for the datasets
                using (var readerX = RawLoaderFactory.CreateFileReader(datasetX.RawFile))
                {
                    // wrap it in the cached object so we can load scan meta-data
                    var cacheReaderX = new RawLoaderCache(readerX);
                    var cacheDataX = ScanSummaryCache.ReadCache(cachex.Cache);

                    readerX.AddDataFile(rawPathX, 0);
                    cacheReaderX.AddCache(0, cacheDataX);

                    for (var j = i + 1; j < data.Count; j++)
                    {
                        var cachey = data[j];
                        // Get the raw path stored in the cache file...
                        // then get the dataset object
                        var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache);
                        var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf);

                        // create a raw file reader for the datasets
                        using (var readerY = RawLoaderFactory.CreateFileReader(datasetY.RawFile))
                        {
                            // Then the writer for creating a report
                            var writer =
                                AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3, name + comparison);
                            comparison++;

                            // wrap it in the cached object so we can load scan meta-data
                            var cacheReaderY = new RawLoaderCache(readerY);
                            var cacheDataY = ScanSummaryCache.ReadCache(cachey.Cache);
                            cacheReaderY.AddCache(0, cacheDataY);
                            readerY.AddDataFile(rawPathY, 0);
                            var names = new List<string> { data[i].Cache, data[j].Cache };

                            var analysis = MatchDatasets(comparerType,
                                readerX,
                                readerY,
                                options,
                                datasetX,
                                datasetY,
                                names);

                            AlignMatches(analysis, writer);
                            writer.Close();
                        }
                    }
                }
            }
        }
Beispiel #14
0
        public void GenerateFigure3_Matches(string directory,
            SpectralComparison comparerType,
            double mzBinSize,
            double mzTolerance,
            double netTolerance,
            double similarityScoreCutoff,
            double peptideScore,
            double peptideFdr,
            double ionPercent,
            int numberOfRequiredPeaks)
        {
            AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4";

            Console.WriteLine(@"Post-Pre Tests For {0}", directory);

            var cacheFiles = Directory.GetFiles(directory, "*.mscache");
            var msgfFiles = Directory.GetFiles(directory, "*_msgfdb_fht.txt");

            Console.WriteLine(@"Building data cache");
            var map = cacheFiles.ToDictionary<string, string, FigureBase.PathCache>(path => path.ToLower(), path => null);

            var data = (from path in msgfFiles
                        let name = path.ToLower().Replace("_msgfdb_fht.txt", ".mscache")
                        let newName = Path.Combine(directory, name)
                        let features = Path.Combine(directory, name)
                        where map.ContainsKey(newName)
                        select new FigureBase.PathCache { Cache = newName, Msgf = path, Features = features }).ToList();

            // The options for the analysis
            var options = new SpectralOptions
            {
                MzBinSize = mzBinSize,
                MzTolerance = mzTolerance,
                NetTolerance = netTolerance,
                SimilarityCutoff = similarityScoreCutoff,
                TopIonPercent = ionPercent,
                IdScore = peptideScore,
                ComparerType = comparerType,
                Fdr = peptideFdr,
                RequiredPeakCount = numberOfRequiredPeaks
            };

            Console.WriteLine(@"{0}", data.Count);

            var comparison = 0;
            for (var i = 0; i < data.Count; i++)
            {
                var cachex = data[i];
                // Get the raw path stored in the cache file...
                // then get the dataset object
                var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache);
                var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf);

                // create a raw file reader for the datasets
                using (var readerX = RawLoaderFactory.CreateFileReader(datasetX.RawFile))
                {
                    // wrap it in the cached object so we can load scan meta-data
                    var cacheReaderX = new RawLoaderCache(readerX);
                    var cacheDataX = ScanSummaryCache.ReadCache(cachex.Cache);

                    readerX.AddDataFile(rawPathX, 0);
                    cacheReaderX.AddCache(0, cacheDataX);

                    for (var j = i + 1; j < data.Count; j++)
                    {
                        // Then the writer for creating a report
                        var writer =
                            AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3,
                                "results-figure3-largeScale" + comparison);
                        comparison++;

                        var cachey = data[j];
                        // Get the raw path stored in the cache file...
                        // then get the dataset object
                        var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache);
                        var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf);

                        // create a raw file reader for the datasets
                        using (var readerY = RawLoaderFactory.CreateFileReader(datasetY.RawFile))
                        {
                            // wrap it in the cached object so we can load scan meta-data
                            var cacheReaderY = new RawLoaderCache(readerY);
                            var cacheDataY = ScanSummaryCache.ReadCache(cachey.Cache);
                            cacheReaderY.AddCache(0, cacheDataY);
                            readerY.AddDataFile(rawPathY, 0);
                            var names = new List<string> { data[i].Cache, data[j].Cache };

                            // Write the results
                            var analysis = MatchDatasets(comparerType,
                                cacheReaderX,
                                cacheReaderY,
                                options,
                                datasetX,
                                datasetY,
                                names);

                            AlignMatches(analysis, writer);
                        }
                    }
                }
            }
        }
Beispiel #15
0
        protected static SpectralAnalysis MatchDatasets(SpectralComparison comparerType,
            ISpectraProvider readerX,
            ISpectraProvider readerY,
            SpectralOptions options,
            AlignmentDataset datasetX,
            AlignmentDataset datasetY,
            List<string> names)
        {
            var peptideReader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF);
            var finder = new SpectralAnchorPointFinder();
            var validator = new SpectralAnchorPointValidator();
            var comparer = SpectralComparerFactory.CreateSpectraComparer(comparerType);
            var filter = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent);

            var matches = finder.FindAnchorPoints(readerX,
                readerY,
                comparer,
                filter,
                options);

            var peptidesX = peptideReader.Read(datasetX.PeptideFile);
            var peptidesY = peptideReader.Read(datasetY.PeptideFile);
            validator.ValidateMatches(matches,
                peptidesX,
                peptidesY,
                options);

            var analysis = new SpectralAnalysis
            {
                DatasetNames = names,
                Matches = matches,
                Options = options
            };
            return analysis;
        }
Beispiel #16
0
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
                                              IEnumerable <DatasetInformation> aligneeDatasets,
                                              LcmsFeatureFindingOptions featureFindingOptions,
                                              MsFeatureFilteringOptions msFilterOptions,
                                              LcmsFeatureFilteringOptions lcmsFilterOptions,
                                              SpectralOptions peptideOptions,
                                              MultiAlignCore.Algorithms.FeatureFinding.IFeatureFinder featureFinder,
                                              IFeatureAligner <IEnumerable <UMCLight>,
                                                               IEnumerable <UMCLight>,
                                                               AlignmentData> aligner,
                                              IClusterer <UMCLight, UMCClusterLight> clusterer,
                                              string matchPath,
                                              string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = new InformedProteomicsReader())
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                                                                  featureFindingOptions,
                                                                  rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.Sequence.Path, baselineFeatures, peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(dataset.RawFile.Path, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                                                                         featureFindingOptions,
                                                                         rawProviderY);
                        LinkPeptidesToFeatures(dataset.Sequence.Path, aligneeFeatures, peptideOptions.Fdr,
                                               peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List <UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters     = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider  = providerY;
                        aligner.BaselineSpectraProvider = providerX;


                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data    = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;


                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List <RegressionPoint>();
                        var netPoints  = new List <RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                                                                  match.AnchorPointY.Mz);
                            var netError  = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }


                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId  = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                                                   preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                                                   postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }
Beispiel #17
0
        private void MatchPeptides(AlignmentDataset datasetX,
            AlignmentDataset datasetY,
            Dictionary<int, ScanSummary> scanDataX,
            Dictionary<int, ScanSummary> scanDataY,
            IEnumerable<string> names,
            SpectralOptions options)
        {
            // Read data for peptides
            var reader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF);
            var peptidesA = reader.Read(datasetX.PeptideFile);
            var peptidesB = reader.Read(datasetY.PeptideFile);

            peptidesA =
                peptidesA.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList();
            peptidesB =
                peptidesB.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList();

            var peptideMapX = PeptideUtility.MapWithBestScan(peptidesA);
            var peptideMapY = PeptideUtility.MapWithBestScan(peptidesB);

            // Determine the scan extrema
            var maxX = scanDataX.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
            var minX = scanDataX.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;
            var maxY = scanDataY.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
            var minY = scanDataY.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;

            // Then map the peptide sequences to identify True Positive and False Positives
            var count = (from scanx in peptideMapX.Keys
                let peptideX = peptideMapX[scanx]
                from scany in peptideMapY.Keys
                let peptideY = peptideMapY[scany]
                let netX = Convert.ToDouble(scanx - minX)/Convert.ToDouble(maxX - minX)
                let netY = Convert.ToDouble(scany - minY)/Convert.ToDouble(maxY - minY)
                let net = Convert.ToDouble(netX - netY)
                where Math.Abs(net) < options.NetTolerance
                where Math.Abs(peptideX.Mz - peptideY.Mz) < options.MzTolerance
                where PeptideUtility.PassesCutoff(peptideX, options.IdScore, options.Fdr)
                      && PeptideUtility.PassesCutoff(peptideY, options.IdScore, options.Fdr)
                      && peptideX.Sequence.Equals(peptideY.Sequence)
                select peptideX).Count();

            Console.WriteLine();
            foreach (var name in names)
                Console.WriteLine(name);
            Console.WriteLine(@"Matches - {0}", count);
        }
Beispiel #18
0
        public void GenerateClusterAlignmentStatistics(string relativeDatabasePath,
            string relativeName,
            string name,
            FeatureAlignmentType alignmentType,
            LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var databasePath    = GetPath(relativeDatabasePath);
            var outputPath      = GetOutputPath(relativeName);

            if (!Directory.Exists(outputPath))
            {
                Directory.CreateDirectory(outputPath);
            }

            // Connect to the NHibernate database
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions = new SpectralOptions
            {
                ComparerType = SpectralComparison.CosineDotProduct,
                Fdr          = .01,
                IdScore      = 1e-09,
                MzBinSize    = .5,
                MzTolerance  = .5,
                NetTolerance = .1,
                RequiredPeakCount   = 32,
                SimilarityCutoff    = .75,
                TopIonPercent       = .8
            };

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass        = instrumentOptions.Mass + 6,
                Net         = instrumentOptions.NetTolerance,
                DriftTime   = instrumentOptions.DriftTimeTolerance
            };

            UpdateStatus("Retrieving all datasets for test.");
            var datasets = providers.DatasetCache.FindAll();

            // Create our algorithms
            var aligner     = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                alignmentOptions.LCMSWarpOptions,
                spectralOptions);
            var clusterer   = ClusterFactory.Create(clusterType);
            clusterer.Parameters = new FeatureClusterParameters<UMCLight>
            {
                Tolerances       = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(clusterer);

            for (var i = 0; i < datasets.Count - 1; i++)
            {
                var matchPath = string.Format("{0}-{1}-matches.txt", name, i);
                var errorPath = string.Format("{0}-{1}-errors.txt", name, i);

                matchPath = Path.Combine(outputPath, matchPath);
                errorPath = Path.Combine(outputPath, errorPath);

                var aligneeDataset      = datasets[i + 1];
                var baselineDataset     = datasets[i];

                // Load the baseline reference set
                using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath))
                {
                    rawProviderX.AddDataFile(baselineDataset.RawPath, 0);
                    // Load the baseline reference set
                    using (var rawProviderY = RawLoaderFactory.CreateFileReader(aligneeDataset.RawPath))
                    {
                        rawProviderY.AddDataFile(aligneeDataset.RawPath, 0);

                        var baselineFeatures = RetrieveFeatures(baselineDataset.DatasetId, providers);
                        var aligneeFeatures  = RetrieveFeatures(aligneeDataset.DatasetId,  providers);
                        var providerX        = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);
                        var providerY        = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        AlignDatasets(  baselineFeatures,
                                        aligneeFeatures,
                                        providerX,
                                        providerY,
                                        aligner,
                                        clusterer,
                                        matchPath,
                                        errorPath);
                    }
                }
            }
        }
Beispiel #19
0
 public SpectralAligner()
 {
     Options          = new SpectralOptions();
     Filter           = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent);
     SpectralComparer = SpectralComparerFactory.CreateSpectraComparer(SpectralComparison.CosineDotProduct);
 }
Beispiel #20
0
        /// <summary>
        ///  Finds features given a dataset
        /// </summary>
        private IList<UMCLight> FindFeatures(  DatasetInformation               information,
            LcmsFeatureFindingOptions   featureFindingOptions,
            MsFeatureFilteringOptions   msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions             peptideOptions,
            IFeatureFinder              featureFinder)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures  = UmcLoaderFactory.LoadMsFeatureData(information.Features.Path);
            msFeatures      = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX  = RawLoaderFactory.CreateFileReader(information.RawPath))
            {
                rawProviderX.AddDataFile(information.RawPath, 0);
                UpdateStatus("Creating LCMS Features.");
                var features    = featureFinder.FindFeatures(msFeatures,
                                                             featureFindingOptions,
                                                             rawProviderX);
                features        = LcmsFeatureFilters.FilterFeatures(features, lcmsFilterOptions);

                var datasetId = information.DatasetId;
                foreach (var feature in features)
                {
                    var lightEntry = new List<MSFeatureLight>();
                    feature.GroupId = datasetId;
                    foreach (var msFeature in feature.MsFeatures)
                    {
                        msFeature.GroupId = datasetId;
                        foreach (var msmsFeature in msFeature.MSnSpectra)
                        {
                            msmsFeature.GroupId = datasetId;
                            foreach (var peptide in msmsFeature.Peptides)
                            {
                                peptide.GroupId = datasetId;
                            }

                        }

                        if (msFeature.MSnSpectra.Count > 0)
                            lightEntry.Add(msFeature);
                    }

                    // We are doing this so that we dont have a ton of MS features in the database
                    feature.MsFeatures.Clear();
                    feature.MsFeatures.AddRange(lightEntry);
                }

                LinkPeptidesToFeatures(information.SequencePath,
                                        features,
                                        peptideOptions.Fdr,
                                        peptideOptions.IdScore);

                DeRegisterProgressNotifier(featureFinder);
                return features;
            }
        }
        public void TestClustering(
            string directory,
            string outputPath,
            FeatureAlignmentType alignmentType,
            LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var matchPath = string.Format("{0}.txt", outputPath);
            var errorPath = string.Format("{0}-errors.txt", outputPath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetInformation.SupportedFileTypes;
            var extensions = new List<string>();
            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var inputFiles = DatasetSearcher.FindDatasets(directory,
                extensions,
                SearchOption.TopDirectoryOnly);
            var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions = new SpectralOptions
            {
                ComparerType = SpectralComparison.CosineDotProduct,
                Fdr = .01,
                IdScore = 1e-09,
                MzBinSize = .5,
                MzTolerance = .5,
                NetTolerance = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff = .75,
                TopIonPercent = .8
            };

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass = instrumentOptions.Mass + 6,
                Net = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };

            // Create our algorithms
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                alignmentOptions.LCMSWarpOptions,
                spectralOptions);
            var clusterer = ClusterFactory.Create(clusterType);
            clusterer.Parameters = new FeatureClusterParameters<UMCLight>
            {
                Tolerances = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(finder);
            RegisterProgressNotifier(clusterer);

            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRange = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity = 5000,
                ChargeRange = new FilterRange(1, 6),
                ShouldUseChargeFilter = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter = true
            };

            for (var i = 0; i < 1; i++)
            {
                var aligneeDatasets = datasets.Where((t, j) => j != i).ToList();
                PerformMultiAlignAnalysis(datasets[0],
                    aligneeDatasets,
                    featureFindingOptions,
                    msFilterOptions,
                    lcmsFilters,
                    spectralOptions,
                    finder,
                    aligner,
                    clusterer,
                    matchPath,
                    errorPath);
            }
        }
Beispiel #22
0
        public void GenerateFigure3_Matches(string directory,
                                            SpectralComparison comparerType,
                                            double mzBinSize,
                                            double mzTolerance,
                                            double netTolerance,
                                            double similarityScoreCutoff,
                                            double peptideScore,
                                            double peptideFdr,
                                            double ionPercent,
                                            int numberOfRequiredPeaks)
        {
            AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4";

            Console.WriteLine(@"Post-Pre Tests For {0}", directory);

            var cacheFiles = Directory.GetFiles(directory, "*.mscache");
            var msgfFiles  = Directory.GetFiles(directory, "*_msgfdb_fht.txt");

            Console.WriteLine(@"Building data cache");
            var map = cacheFiles.ToDictionary <string, string, FigureBase.PathCache>(path => path.ToLower(), path => null);

            var data = (from path in msgfFiles
                        let name = path.ToLower().Replace("_msgfdb_fht.txt", ".mscache")
                                   let newName = Path.Combine(directory, name)
                                                 let features = Path.Combine(directory, name)
                                                                where map.ContainsKey(newName)
                                                                select new FigureBase.PathCache {
                Cache = newName, Msgf = path, Features = features
            }).ToList();


            // The options for the analysis
            var options = new SpectralOptions
            {
                MzBinSize        = mzBinSize,
                MzTolerance      = mzTolerance,
                NetTolerance     = netTolerance,
                SimilarityCutoff = similarityScoreCutoff,
                TopIonPercent    = ionPercent,
                IdScore          = peptideScore,
                ComparerType     = comparerType,
                Fdr = peptideFdr,
                RequiredPeakCount = numberOfRequiredPeaks
            };

            Console.WriteLine(@"{0}", data.Count);

            var comparison = 0;

            for (var i = 0; i < data.Count; i++)
            {
                var cachex = data[i];
                // Get the raw path stored in the cache file...
                // then get the dataset object
                var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache);
                var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf);

                // create a raw file reader for the datasets
                using (var readerX = new InformedProteomicsReader())
                {
                    // wrap it in the cached object so we can load scan meta-data
                    var cacheReaderX = new RawLoaderCache(readerX);
                    var cacheDataX   = ScanSummaryCache.ReadCache(cachex.Cache);

                    readerX.AddDataFile(rawPathX, 0);
                    cacheReaderX.AddCache(0, cacheDataX);

                    for (var j = i + 1; j < data.Count; j++)
                    {
                        // Then the writer for creating a report
                        var writer =
                            AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3,
                                                                  "results-figure3-largeScale" + comparison);
                        comparison++;

                        var cachey = data[j];
                        // Get the raw path stored in the cache file...
                        // then get the dataset object
                        var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache);
                        var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf);

                        // create a raw file reader for the datasets
                        using (var readerY = new InformedProteomicsReader())
                        {
                            // wrap it in the cached object so we can load scan meta-data
                            var cacheReaderY = new RawLoaderCache(readerY);
                            var cacheDataY   = ScanSummaryCache.ReadCache(cachey.Cache);
                            cacheReaderY.AddCache(0, cacheDataY);
                            readerY.AddDataFile(rawPathY, 0);
                            var names = new List <string> {
                                data[i].Cache, data[j].Cache
                            };

                            // Write the results
                            var analysis = MatchDatasets(comparerType,
                                                         cacheReaderX,
                                                         cacheReaderY,
                                                         options,
                                                         datasetX,
                                                         datasetY,
                                                         names);

                            AlignMatches(analysis, writer);
                        }
                    }
                }
            }
        }
        public void TestPeptideBands(string directory,
            string matchPath)
        {
            // Loads the supported MultiAlign types
            var supportedTypes = DatasetInformation.SupportedFileTypes;
            var extensions = new List<string>();
            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var inputFiles = DatasetSearcher.FindDatasets(directory,
                extensions,
                SearchOption.TopDirectoryOnly);
            var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass = instrumentOptions.Mass,
                Net = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };

            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity = 5000,
                ChargeRange = new FilterRange(1, 6),
                ShouldUseChargeFilter = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter = true
            };

            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };

            var baselineDataset = datasets[0];

            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);
            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);
            var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var peptideOptions = new SpectralOptions
            {
                ComparerType = SpectralComparison.CosineDotProduct,
                Fdr = .05,
                IdScore = 1e-09,
                MzBinSize = .5,
                MzTolerance = .5,
                NetTolerance = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff = .75,
                TopIonPercent = .8
            };

            var features = new List<MSFeatureLight>();

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath))
            {
                rawProviderX.AddDataFile(baselineDataset.RawPath, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = finderFinder.FindFeatures(msFeatures,
                    featureFindingOptions,
                    rawProviderX);

                LinkPeptidesToFeatures(baselineDataset.SequencePath,
                    baselineFeatures,
                    peptideOptions.Fdr,
                    peptideOptions.IdScore);

                baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures));
                features = features.Where(x => x.HasMsMs()).ToList();
                features = features.OrderBy(x => x.Mz).ToList();

                var peptideList = new List<MSFeatureLight>();
                foreach (var feature in features)
                {
                    foreach (var spectrum in feature.MSnSpectra)
                    {
                        var peptideFound = false;
                        foreach (var peptide in spectrum.Peptides)
                        {
                            peptideList.Add(feature);
                            peptideFound = true;
                            break;
                        }

                        if (peptideFound)
                            break;
                    }
                }

                using (var writer = File.CreateText(matchPath))
                {
                    writer.WriteLine("Charge\tpmz\tscan\tNET\t");
                    foreach (var feature in peptideList)
                    {
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan,
                            feature.Net);
                    }
                }
            }
        }
Beispiel #24
0
        public void CreateFeatureDatabase(string directoryPath, string databasePath)
        {
            var directory = GetPath(directoryPath);

            databasePath = GetPath(databasePath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };
            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRangeScans = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };
            var spectralOptions = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            NHibernateUtil.CreateDatabase(databasePath);
            // Synchronization and IO for serializing all data to the database.
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, true);
            var cache     = new FeatureLoader
            {
                Providers = providers
            };

            var datasetId = 0;

            foreach (var dataset in datasets)
            {
                dataset.DatasetId = datasetId++;
                var features = FindFeatures(dataset,
                                            featureFindingOptions,
                                            msFilterOptions,
                                            lcmsFilters,
                                            spectralOptions,
                                            finder);

                cache.CacheFeatures(features);
            }
            providers.DatasetCache.AddAll(datasets);
        }
Beispiel #25
0
        ///// <summary>
        ///// Links anchor points use the raw spectra provided.
        ///// </summary>
        //public IEnumerable<SpectralAnchorPointMatch> FindAnchorPoints2( ISpectraProvider            readerX,
        //                                                                ISpectraProvider           readerY,
        //                                                                ISpectralComparer          comparer,
        //                                                                ISpectraFilter             filter,
        //                                                                SpectralOptions            options,
        //                                                                bool skipComparison        = true)
        //{
        //    var matches = new List<SpectralAnchorPointMatch>();
        //    var scanDataX  = readerX.GetScanData(0);
        //    var scanDataY  = readerY.GetScanData(0);

        //    // Determine the scan extrema
        //    var maxX = scanDataX.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
        //    var minX = scanDataX.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;
        //    var maxY = scanDataY.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
        //    var minY = scanDataY.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;

        //    // Create a spectral comparer
        //    var ySpectraCache = new Dictionary<int, MSSpectra>();

        //    // Here we sort the summary spectra....so that we can improve run time efficiency
        //    // and minimize as much memory as possible.
        //    var ySpectraSummary = scanDataY.Values.Where(summary => summary.MsLevel == 2).ToList();
        //    var xSpectraSummary = scanDataX.Values.Where(summary => summary.MsLevel == 2).ToList();

        //    ySpectraSummary.Sort((x, y) => x.PrecursorMZ.CompareTo(y.PrecursorMZ));
        //    xSpectraSummary.Sort((x, y) => x.PrecursorMZ.CompareTo(y.PrecursorMZ));

        //    double mzTolerance = options.MzTolerance;

        //    foreach (var xsum in xSpectraSummary)
        //    {
        //        int scanx = xsum.Scan;

        //        // Grab the first spectra
        //        var spectrumX     = SpectralUtilities.GetSpectra(options.MzBinSize,
        //                                                            options.TopIonPercent,
        //                                                            filter,
        //                                                            readerX,
        //                                                            scanx,
        //                                                            options.RequiredPeakCount);

        //        spectrumX.PrecursorMZ   = xsum.PrecursorMZ;


        //        // Here we make sure that we are efficiently using the cache...we want to clear any
        //        // cached spectra that we arent using.  We know that the summaries are sorted by m/z
        //        // so if the xsum m/z is greater than anything in the cache, dump the spectra...
        //        double currentMz = xsum.PrecursorMZ;
        //        // Use linq?
        //        var toRemove = new List<int>();
        //        foreach (int scan in ySpectraCache.Keys)
        //        {
        //            MSSpectra yscan     = ySpectraCache[scan];
        //            double difference   = currentMz - yscan.PrecursorMZ;
        //            // We only need to care about smaller m/z's
        //            if (difference >= mzTolerance)
        //            {
        //                toRemove.Add(scan);
        //            }
        //            else
        //            {
        //                // Because if we are here, we are within range...AND!
        //                // ...the m/z of i + 1 > i...because they are sorted...
        //                // so if the m/z comes within range (positive) then
        //                // that means we need to evaluate the tolerance.
        //                break;
        //            }
        //        }

        //        // Then we clean up...since spectra can be large...we'll take the performance hit here...
        //        // and minimize memory impacts!
        //        if (toRemove.Count > 0)
        //        {
        //            toRemove.ForEach(x => ySpectraCache.Remove(x));
        //            GC.Collect();
        //            GC.WaitForPendingFinalizers();
        //        }

        //        // Iterate through the other analysis.
        //        foreach (var ysum in ySpectraSummary)
        //        {
        //            int scany = ysum.Scan;

        //            // We know that we are out of range here....
        //            if (Math.Abs(xsum.PrecursorMZ - ysum.PrecursorMZ) >= mzTolerance)
        //                continue;

        //            double netX = Convert.ToDouble(scanx - minX) / Convert.ToDouble(maxX - minX);
        //            double netY = Convert.ToDouble(scany - minY) / Convert.ToDouble(maxY - minY);
        //            double net  = Convert.ToDouble(netX - netY);

        //            // Has to pass the NET tolerance
        //            if (options.NetTolerance < Math.Abs(net)) continue;


        //            // Grab the first spectra...if we have it, great dont re-read
        //            MSSpectra spectrumY = null;
        //            if (ySpectraCache.ContainsKey(scany))
        //            {
        //                if (!skipComparison)
        //                    spectrumY = ySpectraCache[scany];
        //            }
        //            else
        //            {
        //                if (!skipComparison)
        //                {
        //                    spectrumY = SpectralUtilities.GetSpectra(options.MzBinSize,
        //                                                            options.TopIonPercent,
        //                                                            filter,
        //                                                            readerY,
        //                                                            scany,
        //                                                            options.RequiredPeakCount);
        //                    spectrumY.PrecursorMZ = ysum.PrecursorMZ;
        //                    ySpectraCache.Add(scany, spectrumY);
        //                }
        //            }

        //            // compare the spectra
        //            double spectralSimilarity = 0;


        //            if (!skipComparison)
        //                spectralSimilarity = comparer.CompareSpectra(spectrumX, spectrumY);

        //            if (double.IsNaN(spectralSimilarity) || double.IsNegativeInfinity(spectralSimilarity) || double.IsPositiveInfinity(spectralSimilarity))
        //                continue;

        //            if (spectralSimilarity < options.SimilarityCutoff)
        //                continue;

        //            var pointX      = new SpectralAnchorPoint
        //            {
        //                Net = netX,
        //                Mass = 0,
        //                Mz = xsum.PrecursorMZ,
        //                Scan = scanx,
        //                Spectrum = spectrumX
        //            };

        //            var pointY = new SpectralAnchorPoint
        //            {
        //                Net = netX,
        //                Mass = 0,
        //                Mz = ysum.PrecursorMZ,
        //                Scan = scany,
        //                Spectrum = spectrumY
        //            };

        //            var match = new SpectralAnchorPointMatch
        //            {
        //                AnchorPointX    = pointX,
        //                AnchorPointY    = pointY,
        //                SimilarityScore = spectralSimilarity,
        //                IsValidMatch    = AnchorPointMatchType.FalseMatch
        //            };

        //            matches.Add(match);
        //        }
        //    }

        //    return matches;
        //}

        /// <summary>
        /// Computes all anchor point matches between two sets of spectra.
        /// </summary>
        /// <param name="readerX"></param>
        /// <param name="readerY"></param>
        /// <param name="comparer"></param>
        /// <param name="filter"></param>
        /// <param name="options"></param>
        /// <param name="skipComparison"></param>
        /// <returns></returns>
        public IEnumerable <SpectralAnchorPointMatch> FindAnchorPoints(ISpectraProvider readerX,
                                                                       ISpectraProvider readerY,
                                                                       ISpectralComparer comparer,
                                                                       ISpectraFilter filter,
                                                                       SpectralOptions options,
                                                                       bool skipComparison = false)
        {
            var matches   = new List <SpectralAnchorPointMatch>();
            var scanDataX = readerX.GetScanData(0);
            var scanDataY = readerY.GetScanData(0);

            // Determine the scan extrema
            var maxX = scanDataX.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
            var minX = scanDataX.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;
            var maxY = scanDataY.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key;
            var minY = scanDataY.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key;

            // Here we sort the summary spectra....so that we can improve run time efficiency
            // and minimize as much memory as possible.
            var ySpectraSummary = scanDataY.Values.Where(summary => summary.MsLevel == 2).ToList();
            var xSpectraSummary = scanDataX.Values.Where(summary => summary.MsLevel == 2).ToList();


            ySpectraSummary.Sort((x, y) => x.PrecursorMz.CompareTo(y.PrecursorMz));
            xSpectraSummary.Sort((x, y) => x.PrecursorMz.CompareTo(y.PrecursorMz));

            var netTolerance = options.NetTolerance;
            var mzTolerance  = options.MzTolerance;
            var j            = 0;
            var i            = 0;
            var yTotal       = ySpectraSummary.Count;
            var xTotal       = xSpectraSummary.Count;

            var similarities = new List <double>();

            var cache   = new Dictionary <int, MSSpectra>();
            var pointsY = new Dictionary <int, SpectralAnchorPoint>();

            while (i < xTotal && j < yTotal)
            {
                var       xsum       = xSpectraSummary[i];
                var       scanx      = xsum.Scan;
                var       precursorX = xsum.PrecursorMz;
                MSSpectra spectrumX  = null;

                while (j < yTotal && ySpectraSummary[j].PrecursorMz < (precursorX - mzTolerance))
                {
                    // Here we make sure we arent caching something
                    var scany = ySpectraSummary[j].Scan;
                    if (cache.ContainsKey(scany))
                    {
                        cache.Remove(scany);
                        if (pointsY.ContainsKey(scany))
                        {
                            if (pointsY[scany].Spectrum.Peaks != null)
                            {
                                pointsY[scany].Spectrum.Peaks.Clear();
                                pointsY[scany].Spectrum.Peaks = null;
                            }
                        }
                    }
                    j++;
                }


                var k      = 0;
                var points = new List <SpectralAnchorPoint>();

                while ((j + k) < yTotal && Math.Abs(ySpectraSummary[j + k].PrecursorMz - precursorX) < mzTolerance)
                {
                    var ysum = ySpectraSummary[j + k];
                    k++;
                    var scany = ysum.Scan;
                    var netX  = Convert.ToDouble(scanx - minX) / Convert.ToDouble(maxX - minX);
                    var netY  = Convert.ToDouble(scany - minY) / Convert.ToDouble(maxY - minY);
                    var net   = Convert.ToDouble(netX - netY);

                    // Test whether the spectra are within decent range.
                    if (Math.Abs(net) < netTolerance)
                    {
                        // We didnt pull this spectrum before, because we arent sure
                        // if it will be within tolerance....so we just delay this
                        // until we have to...after this happens, we only pull it once.
                        if (spectrumX == null)
                        {
                            if (!skipComparison)
                            {
                                // Grab the first spectra
                                spectrumX = SpectralUtilities.GetSpectra(options.MzBinSize,
                                                                         options.TopIonPercent,
                                                                         filter,
                                                                         readerX,
                                                                         scanx,
                                                                         options.RequiredPeakCount);

                                if (spectrumX != null)
                                {
                                    spectrumX.PrecursorMz = xsum.PrecursorMz;
                                }
                                else
                                {
                                    // This spectra does not have enough peaks or did not pass our filters, throw it away!
                                    break;
                                }
                            }
                        }
                        MSSpectra spectrumY = null;
                        if (!skipComparison)
                        {
                            if (cache.ContainsKey(scany))
                            {
                                spectrumY = cache[scany];
                            }
                            else
                            {
                                spectrumY = SpectralUtilities.GetSpectra(options.MzBinSize,
                                                                         options.TopIonPercent,
                                                                         filter,
                                                                         readerY,
                                                                         scany,
                                                                         options.RequiredPeakCount);

                                if (spectrumY != null)
                                {
                                    spectrumY.PrecursorMz = ysum.PrecursorMz;
                                    cache.Add(scany, spectrumY);
                                }
                                else
                                {
                                    continue;  // This spectra does not have enough peaks or did not pass our filters, throw it away!
                                }
                            }
                        }

                        if (spectrumX == null || spectrumY == null)
                        {
                            continue;
                        }

                        // compare the spectra
                        double spectralSimilarity = 0;
                        if (!skipComparison)
                        {
                            spectralSimilarity = comparer.CompareSpectra(spectrumX, spectrumY);
                        }

                        // similarities.Add(spectralSimilarity);
                        File.AppendAllText(@"c:\data\proteomics\test.txt", string.Format("{0}\t{1}\t{2}\n", spectrumX.PrecursorMz, spectrumY.PrecursorMz, spectralSimilarity));

                        if (double.IsNaN(spectralSimilarity) || double.IsInfinity(spectralSimilarity))
                        {
                            continue;
                        }



                        if (spectralSimilarity < options.SimilarityCutoff)
                        {
                            continue;
                        }

                        var pointX = new SpectralAnchorPoint
                        {
                            Net      = netX,
                            Mass     = 0,
                            Mz       = xsum.PrecursorMz,
                            Scan     = scanx,
                            Spectrum = spectrumX
                        };

                        var pointY = new SpectralAnchorPoint
                        {
                            Net      = netY,
                            Mass     = 0,
                            Mz       = ysum.PrecursorMz,
                            Scan     = scany,
                            Spectrum = spectrumY
                        };

                        var match = new SpectralAnchorPointMatch();
                        match.AnchorPointX    = pointX;
                        match.AnchorPointY    = pointY;
                        match.SimilarityScore = spectralSimilarity;
                        match.IsValidMatch    = AnchorPointMatchType.FalseMatch;
                        matches.Add(match);


                        points.Add(pointX);
                        if (!pointsY.ContainsKey(scany))
                        {
                            pointsY.Add(scany, pointY);
                        }
                    }
                }
                // Move to the next spectra in the x-list
                i++;
                foreach (var p in points)
                {
                    if (p.Spectrum.Peaks != null)
                    {
                        p.Spectrum.Peaks.Clear();
                        p.Spectrum.Peaks = null;
                    }
                }
                points.Clear();
            }
            return(matches);
        }
Beispiel #26
0
        public void GenerateClusterAlignmentStatistics(string relativeDatabasePath,
                                                       string relativeName,
                                                       string name,
                                                       FeatureAlignmentType alignmentType,
                                                       LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var databasePath = GetPath(relativeDatabasePath);
            var outputPath   = GetOutputPath(relativeName);

            if (!Directory.Exists(outputPath))
            {
                Directory.CreateDirectory(outputPath);
            }

            // Connect to the NHibernate database
            var providers = DataAccessFactory.CreateDataAccessProviders(databasePath, false);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions  = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };

            UpdateStatus("Retrieving all datasets for test.");
            var datasets = providers.DatasetCache.FindAll();

            // Create our algorithms
            var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                                                                     alignmentOptions.LCMSWarpOptions,
                                                                     spectralOptions);
            var clusterer = ClusterFactory.Create(clusterType);

            clusterer.Parameters = new FeatureClusterParameters <UMCLight>
            {
                Tolerances = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(clusterer);

            for (var i = 0; i < datasets.Count - 1; i++)
            {
                var matchPath = string.Format("{0}-{1}-matches.txt", name, i);
                var errorPath = string.Format("{0}-{1}-errors.txt", name, i);

                matchPath = Path.Combine(outputPath, matchPath);
                errorPath = Path.Combine(outputPath, errorPath);



                var aligneeDataset  = datasets[i + 1];
                var baselineDataset = datasets[i];

                // Load the baseline reference set
                using (var rawProviderX = new InformedProteomicsReader())
                {
                    rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                    // Load the baseline reference set
                    using (var rawProviderY = new InformedProteomicsReader())
                    {
                        rawProviderY.AddDataFile(aligneeDataset.RawFile.Path, 0);

                        var baselineFeatures = RetrieveFeatures(baselineDataset.DatasetId, providers);
                        var aligneeFeatures  = RetrieveFeatures(aligneeDataset.DatasetId, providers);
                        var providerX        = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);
                        var providerY        = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        AlignDatasets(baselineFeatures,
                                      aligneeFeatures,
                                      providerX,
                                      providerY,
                                      aligner,
                                      clusterer,
                                      matchPath,
                                      errorPath);
                    }
                }
            }
        }
Beispiel #27
0
        public void TestPeptideBands(string directory,
                                     string matchPath)
        {
            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };

            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };

            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };

            var baselineDataset = datasets[0];

            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);

            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);
            var finderFinder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);

            var peptideOptions = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .05,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };

            var features = new List <MSFeatureLight>();

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawFile.Path))
            {
                rawProviderX.AddDataFile(baselineDataset.RawFile.Path, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = finderFinder.FindFeatures(msFeatures,
                                                                 featureFindingOptions,
                                                                 rawProviderX);

                LinkPeptidesToFeatures(baselineDataset.Sequence.Path,
                                       baselineFeatures,
                                       peptideOptions.Fdr,
                                       peptideOptions.IdScore);

                baselineFeatures.ForEach(x => features.AddRange(x.MsFeatures));
                features = features.Where(x => x.HasMsMs()).ToList();
                features = features.OrderBy(x => x.Mz).ToList();

                var peptideList = new List <MSFeatureLight>();
                foreach (var feature in features)
                {
                    foreach (var spectrum in feature.MSnSpectra)
                    {
                        var peptideFound = false;
                        foreach (var peptide in spectrum.Peptides)
                        {
                            peptideList.Add(feature);
                            peptideFound = true;
                            break;
                        }

                        if (peptideFound)
                        {
                            break;
                        }
                    }
                }

                using (var writer = File.CreateText(matchPath))
                {
                    writer.WriteLine("Charge\tpmz\tscan\tNET\t");
                    foreach (var feature in peptideList)
                    {
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t", feature.ChargeState, feature.Mz, feature.Scan,
                                         feature.Net);
                    }
                }
            }
        }
Beispiel #28
0
        public void TestClustering(
            string directory,
            string outputPath,
            FeatureAlignmentType alignmentType,
            LcmsFeatureClusteringAlgorithmType clusterType)
        {
            var matchPath = string.Format("{0}.txt", outputPath);
            var errorPath = string.Format("{0}-errors.txt", outputPath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetLoader.SupportedFileTypes;
            var extensions     = new List <string>();

            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var datasetLoader = new DatasetLoader();
            var datasets      = datasetLoader.GetValidDatasets(directory, extensions, SearchOption.TopDirectoryOnly);

            // Setup our alignment options
            var alignmentOptions = new AlignmentOptions();
            var spectralOptions  = new SpectralOptions
            {
                ComparerType      = SpectralComparison.CosineDotProduct,
                Fdr               = .01,
                IdScore           = 1e-09,
                MzBinSize         = .5,
                MzTolerance       = .5,
                NetTolerance      = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff  = .75,
                TopIonPercent     = .8
            };


            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass      = instrumentOptions.Mass + 6,
                Net       = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange  = .002,
                MaximumScanRange = 50
            };

            // Create our algorithms
            var finder  = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            var aligner = FeatureAlignerFactory.CreateDatasetAligner(alignmentType,
                                                                     alignmentOptions.LCMSWarpOptions,
                                                                     spectralOptions);
            var clusterer = ClusterFactory.Create(clusterType);

            clusterer.Parameters = new FeatureClusterParameters <UMCLight>
            {
                Tolerances = featureTolerances
            };

            RegisterProgressNotifier(aligner);
            RegisterProgressNotifier(finder);
            RegisterProgressNotifier(clusterer);

            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRangeScans = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity           = 5000,
                ChargeRange                = new FilterRange(1, 6),
                ShouldUseChargeFilter      = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter   = true
            };

            for (var i = 0; i < 1; i++)
            {
                var aligneeDatasets = datasets.Where((t, j) => j != i).ToList();
                PerformMultiAlignAnalysis(datasets[0],
                                          aligneeDatasets,
                                          featureFindingOptions,
                                          msFilterOptions,
                                          lcmsFilters,
                                          spectralOptions,
                                          finder,
                                          aligner,
                                          clusterer,
                                          matchPath,
                                          errorPath);
            }
        }
Beispiel #29
0
        public void CreateFeatureDatabase(string directoryPath, string databasePath)
        {
            var directory  = GetPath(directoryPath);
            databasePath   = GetPath(databasePath);

            // Loads the supported MultiAlign types
            var supportedTypes = DatasetInformation.SupportedFileTypes;
            var extensions = new List<string>();
            supportedTypes.ForEach(x => extensions.Add("*" + x.Extension));

            // Find our datasets
            var inputFiles = DatasetSearcher.FindDatasets(directory,
                extensions,
                SearchOption.TopDirectoryOnly);
            var datasets = DatasetInformation.ConvertInputFilesIntoDatasets(inputFiles);

            // Options setup
            var instrumentOptions = InstrumentPresetFactory.Create(InstrumentPresets.LtqOrbitrap);
            var featureTolerances = new FeatureTolerances
            {
                Mass = instrumentOptions.Mass + 6,
                Net = instrumentOptions.NetTolerance,
                DriftTime = instrumentOptions.DriftTimeTolerance
            };
            var featureFindingOptions = new LcmsFeatureFindingOptions(featureTolerances)
            {
                MaximumNetRange = .002,
                MaximumScanRange = 50
            };
            var lcmsFilters = new LcmsFeatureFilteringOptions
            {
                FeatureLengthRange = new FilterRange(50, 300)
            };
            var msFilterOptions = new MsFeatureFilteringOptions
            {
                MinimumIntensity = 5000,
                ChargeRange = new FilterRange(1, 6),
                ShouldUseChargeFilter = true,
                ShouldUseDeisotopingFilter = true,
                ShouldUseIntensityFilter = true
            };
            var spectralOptions = new SpectralOptions
            {
                ComparerType = SpectralComparison.CosineDotProduct,
                Fdr = .01,
                IdScore = 1e-09,
                MzBinSize = .5,
                MzTolerance = .5,
                NetTolerance = .1,
                RequiredPeakCount = 32,
                SimilarityCutoff = .75,
                TopIonPercent = .8
            };
            var finder = FeatureFinderFactory.CreateFeatureFinder(FeatureFinderType.TreeBased);
            NHibernateUtil.CreateDatabase(databasePath);
            // Synchronization and IO for serializing all data to the database.
            var providers   = DataAccessFactory.CreateDataAccessProviders(databasePath, true);
            var cache       = new FeatureLoader
            {
               Providers = providers
            };

            var datasetId = 0;
            foreach(var dataset in datasets)
            {
                dataset.DatasetId = datasetId++;
                var features = FindFeatures(dataset,
                                            featureFindingOptions,
                                            msFilterOptions,
                                            lcmsFilters,
                                            spectralOptions,
                                            finder);

                cache.CacheFeatures(features);
            }
            providers.DatasetCache.AddAll(datasets);
        }
        /// <summary>
        ///     Runs the MultiAlign analysis
        /// </summary>
        public void PerformMultiAlignAnalysis(DatasetInformation baselineDataset,
            IEnumerable<DatasetInformation> aligneeDatasets,
            LcmsFeatureFindingOptions featureFindingOptions,
            MsFeatureFilteringOptions msFilterOptions,
            LcmsFeatureFilteringOptions lcmsFilterOptions,
            SpectralOptions peptideOptions,
            IFeatureFinder featureFinder,
            IFeatureAligner<IEnumerable<UMCLight>,
            IEnumerable<UMCLight>,
            classAlignmentData> aligner,
            IClusterer<UMCLight, UMCClusterLight> clusterer,
            string matchPath,
            string errorPath)
        {
            UpdateStatus("Loading baseline features.");
            var msFeatures = UmcLoaderFactory.LoadMsFeatureData(baselineDataset.Features.Path);
            msFeatures = LcmsFeatureFilters.FilterMsFeatures(msFeatures, msFilterOptions);

            // Load the baseline reference set
            using (var rawProviderX = RawLoaderFactory.CreateFileReader(baselineDataset.RawPath))
            {
                rawProviderX.AddDataFile(baselineDataset.RawPath, 0);
                UpdateStatus("Creating Baseline LCMS Features.");
                var baselineFeatures = featureFinder.FindFeatures(msFeatures,
                    featureFindingOptions,
                    rawProviderX);
                LinkPeptidesToFeatures(baselineDataset.SequencePath, baselineFeatures, peptideOptions.Fdr,
                    peptideOptions.IdScore);

                var providerX = new CachedFeatureSpectraProvider(rawProviderX, baselineFeatures);

                // Then load the alignee dataset
                foreach (var dataset in aligneeDatasets)
                {
                    var aligneeMsFeatures = UmcLoaderFactory.LoadMsFeatureData(dataset.Features.Path);
                    aligneeMsFeatures = LcmsFeatureFilters.FilterMsFeatures(aligneeMsFeatures, msFilterOptions);
                    using (var rawProviderY = RawLoaderFactory.CreateFileReader(dataset.RawPath))
                    {
                        rawProviderY.AddDataFile(dataset.RawPath, 0);

                        UpdateStatus("Finding alignee features");
                        var aligneeFeatures = featureFinder.FindFeatures(aligneeMsFeatures,
                            featureFindingOptions,
                            rawProviderY);
                        LinkPeptidesToFeatures(dataset.SequencePath, aligneeFeatures, peptideOptions.Fdr,
                            peptideOptions.IdScore);

                        var providerY = new CachedFeatureSpectraProvider(rawProviderY, aligneeFeatures);

                        // cluster before we do anything else....
                        var allFeatures = new List<UMCLight>();
                        allFeatures.AddRange(baselineFeatures);
                        allFeatures.AddRange(aligneeFeatures);
                        foreach (var feature in allFeatures)
                        {
                            feature.Net = feature.Net;
                            feature.MassMonoisotopicAligned = feature.MassMonoisotopic;
                        }

                        // This tells us the differences before we align.
                        var clusters = clusterer.Cluster(allFeatures);
                        var preAlignment = AnalyzeClusters(clusters);

                        aligner.AligneeSpectraProvider = providerY;
                        aligner.BaselineSpectraProvider = providerX;

                        UpdateStatus("Aligning data");
                        // Aligner data
                        var data = aligner.Align(baselineFeatures, aligneeFeatures);
                        var matches = data.Matches;

                        WriteErrors(errorPath, matches);

                        // create anchor points for LCMSWarp alignment
                        var massPoints = new List<RegressionPoint>();
                        var netPoints = new List<RegressionPoint>();
                        foreach (var match in matches)
                        {
                            var massError = FeatureLight.ComputeMassPPMDifference(match.AnchorPointX.Mz,
                                match.AnchorPointY.Mz);
                            var netError = match.AnchorPointX.Net - match.AnchorPointY.Net;
                            var massPoint = new RegressionPoint(match.AnchorPointX.Mz, 0, massError, netError);
                            massPoints.Add(massPoint);

                            var netPoint = new RegressionPoint(match.AnchorPointX.Net, 0, massError, netError);
                            netPoints.Add(netPoint);
                        }

                        foreach (var feature in allFeatures)
                        {
                            feature.UmcCluster = null;
                            feature.ClusterId = -1;
                        }
                        // Then cluster after alignment!
                        UpdateStatus("clustering data");
                        clusters = clusterer.Cluster(allFeatures);
                        var postAlignment = AnalyzeClusters(clusters);

                        UpdateStatus("Note\tSame\tDifferent");
                        UpdateStatus(string.Format("Pre\t{0}\t{1}", preAlignment.SameCluster,
                            preAlignment.DifferentCluster));
                        UpdateStatus(string.Format("Post\t{0}\t{1}", postAlignment.SameCluster,
                            postAlignment.DifferentCluster));

                        SaveMatches(matchPath, matches);
                    }
                }
            }

            DeRegisterProgressNotifier(aligner);
            DeRegisterProgressNotifier(featureFinder);
            DeRegisterProgressNotifier(clusterer);
        }