private void MatchPeptides(AlignmentDataset datasetX, AlignmentDataset datasetY, Dictionary <int, ScanSummary> scanDataX, Dictionary <int, ScanSummary> scanDataY, IEnumerable <string> names, SpectralOptions options) { // Read data for peptides var reader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF); var peptidesA = reader.Read(datasetX.PeptideFile); var peptidesB = reader.Read(datasetY.PeptideFile); peptidesA = peptidesA.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList(); peptidesB = peptidesB.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList(); var peptideMapX = PeptideUtility.MapWithBestScan(peptidesA); var peptideMapY = PeptideUtility.MapWithBestScan(peptidesB); // Determine the scan extrema var maxX = scanDataX.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key; var minX = scanDataX.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key; var maxY = scanDataY.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key; var minY = scanDataY.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key; // Then map the peptide sequences to identify True Positive and False Positives var count = (from scanx in peptideMapX.Keys let peptideX = peptideMapX[scanx] from scany in peptideMapY.Keys let peptideY = peptideMapY[scany] let netX = Convert.ToDouble(scanx - minX) / Convert.ToDouble(maxX - minX) let netY = Convert.ToDouble(scany - minY) / Convert.ToDouble(maxY - minY) let net = Convert.ToDouble(netX - netY) where Math.Abs(net) < options.NetTolerance where Math.Abs(peptideX.Mz - peptideY.Mz) < options.MzTolerance where PeptideUtility.PassesCutoff(peptideX, options.IdScore, options.Fdr) && PeptideUtility.PassesCutoff(peptideY, options.IdScore, options.Fdr) && peptideX.Sequence.Equals(peptideY.Sequence) select peptideX).Count(); Console.WriteLine(); foreach (var name in names) { Console.WriteLine(name); } Console.WriteLine(@"Matches - {0}", count); }
protected static SpectralAnalysis MatchDatasets(SpectralComparison comparerType, ISpectraProvider readerX, ISpectraProvider readerY, SpectralOptions options, AlignmentDataset datasetX, AlignmentDataset datasetY, List <string> names) { var peptideReader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF); var finder = new SpectralAnchorPointFinder(); var validator = new SpectralAnchorPointValidator(); var comparer = SpectralComparerFactory.CreateSpectraComparer(comparerType); var filter = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent); var matches = finder.FindAnchorPoints(readerX, readerY, comparer, filter, options); var peptidesX = peptideReader.Read(datasetX.PeptideFile); var peptidesY = peptideReader.Read(datasetY.PeptideFile); validator.ValidateMatches(matches, peptidesX, peptidesY, options); var analysis = new SpectralAnalysis { DatasetNames = names, Matches = matches, Options = options }; return(analysis); }
public void GenerateFigure4_MetaMatches(string directory, SpectralComparison comparerType, double mzBinSize, double mzTolerance, double netTolerance, double similarityScoreCutoff, double peptideScore, double peptideFdr, double ionPercent, int numberOfRequiredPeaks, string name) { AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4"; Console.WriteLine(@"Post-Pre Tests For {0}", directory); var cacheFiles = Directory.GetFiles(directory, "*.mscache"); Console.WriteLine(@"Building data cache"); var data = cacheFiles.Select(path => new FigureBase.PathCache { Cache = path }).ToList(); // The options for the analysis var options = new SpectralOptions { MzBinSize = mzBinSize, MzTolerance = mzTolerance, NetTolerance = netTolerance, SimilarityCutoff = similarityScoreCutoff, TopIonPercent = ionPercent, IdScore = peptideScore, ComparerType = comparerType, Fdr = peptideFdr, RequiredPeakCount = numberOfRequiredPeaks }; var comparison = 0; for (var i = 0; i < data.Count; i++) { var cachex = data[i]; // Get the raw path stored in the cache file... // then get the dataset object var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache); var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf); // create a raw file reader for the datasets using (var readerX = new InformedProteomicsReader()) { // wrap it in the cached object so we can load scan meta-data var cacheReaderX = new RawLoaderCache(readerX); var cacheDataX = ScanSummaryCache.ReadCache(cachex.Cache); readerX.AddDataFile(rawPathX, 0); cacheReaderX.AddCache(0, cacheDataX); for (var j = i + 1; j < data.Count; j++) { var cachey = data[j]; // Get the raw path stored in the cache file... // then get the dataset object var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache); var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf); // create a raw file reader for the datasets using (var readerY = new InformedProteomicsReader()) { // Then the writer for creating a report var writer = AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3, name + comparison); comparison++; // wrap it in the cached object so we can load scan meta-data var cacheReaderY = new RawLoaderCache(readerY); var cacheDataY = ScanSummaryCache.ReadCache(cachey.Cache); cacheReaderY.AddCache(0, cacheDataY); readerY.AddDataFile(rawPathY, 0); var names = new List <string> { data[i].Cache, data[j].Cache }; var analysis = MatchDatasets(comparerType, readerX, readerY, options, datasetX, datasetY, names); AlignMatches(analysis, writer); writer.Close(); } } } } }
public void GenerateFigure3_Matches(string directory, SpectralComparison comparerType, double mzBinSize, double mzTolerance, double netTolerance, double similarityScoreCutoff, double peptideScore, double peptideFdr, double ionPercent, int numberOfRequiredPeaks) { AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4"; Console.WriteLine(@"Post-Pre Tests For {0}", directory); var cacheFiles = Directory.GetFiles(directory, "*.mscache"); var msgfFiles = Directory.GetFiles(directory, "*_msgfdb_fht.txt"); Console.WriteLine(@"Building data cache"); var map = cacheFiles.ToDictionary<string, string, FigureBase.PathCache>(path => path.ToLower(), path => null); var data = (from path in msgfFiles let name = path.ToLower().Replace("_msgfdb_fht.txt", ".mscache") let newName = Path.Combine(directory, name) let features = Path.Combine(directory, name) where map.ContainsKey(newName) select new FigureBase.PathCache { Cache = newName, Msgf = path, Features = features }).ToList(); // The options for the analysis var options = new SpectralOptions { MzBinSize = mzBinSize, MzTolerance = mzTolerance, NetTolerance = netTolerance, SimilarityCutoff = similarityScoreCutoff, TopIonPercent = ionPercent, IdScore = peptideScore, ComparerType = comparerType, Fdr = peptideFdr, RequiredPeakCount = numberOfRequiredPeaks }; Console.WriteLine(@"{0}", data.Count); var comparison = 0; for (var i = 0; i < data.Count; i++) { var cachex = data[i]; // Get the raw path stored in the cache file... // then get the dataset object var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache); var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf); // create a raw file reader for the datasets using (var readerX = RawLoaderFactory.CreateFileReader(datasetX.RawFile)) { // wrap it in the cached object so we can load scan meta-data var cacheReaderX = new RawLoaderCache(readerX); var cacheDataX = ScanSummaryCache.ReadCache(cachex.Cache); readerX.AddDataFile(rawPathX, 0); cacheReaderX.AddCache(0, cacheDataX); for (var j = i + 1; j < data.Count; j++) { // Then the writer for creating a report var writer = AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3, "results-figure3-largeScale" + comparison); comparison++; var cachey = data[j]; // Get the raw path stored in the cache file... // then get the dataset object var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache); var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf); // create a raw file reader for the datasets using (var readerY = RawLoaderFactory.CreateFileReader(datasetY.RawFile)) { // wrap it in the cached object so we can load scan meta-data var cacheReaderY = new RawLoaderCache(readerY); var cacheDataY = ScanSummaryCache.ReadCache(cachey.Cache); cacheReaderY.AddCache(0, cacheDataY); readerY.AddDataFile(rawPathY, 0); var names = new List<string> { data[i].Cache, data[j].Cache }; // Write the results var analysis = MatchDatasets(comparerType, cacheReaderX, cacheReaderY, options, datasetX, datasetY, names); AlignMatches(analysis, writer); } } } } }
protected static SpectralAnalysis MatchDatasets(SpectralComparison comparerType, ISpectraProvider readerX, ISpectraProvider readerY, SpectralOptions options, AlignmentDataset datasetX, AlignmentDataset datasetY, List<string> names) { var peptideReader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF); var finder = new SpectralAnchorPointFinder(); var validator = new SpectralAnchorPointValidator(); var comparer = SpectralComparerFactory.CreateSpectraComparer(comparerType); var filter = SpectrumFilterFactory.CreateFilter(SpectraFilters.TopPercent); var matches = finder.FindAnchorPoints(readerX, readerY, comparer, filter, options); var peptidesX = peptideReader.Read(datasetX.PeptideFile); var peptidesY = peptideReader.Read(datasetY.PeptideFile); validator.ValidateMatches(matches, peptidesX, peptidesY, options); var analysis = new SpectralAnalysis { DatasetNames = names, Matches = matches, Options = options }; return analysis; }
public void GenerateFigure3_Matches(string directory, SpectralComparison comparerType, double mzBinSize, double mzTolerance, double netTolerance, double similarityScoreCutoff, double peptideScore, double peptideFdr, double ionPercent, int numberOfRequiredPeaks) { AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4"; Console.WriteLine(@"Post-Pre Tests For {0}", directory); var cacheFiles = Directory.GetFiles(directory, "*.mscache"); var msgfFiles = Directory.GetFiles(directory, "*_msgfdb_fht.txt"); Console.WriteLine(@"Building data cache"); var map = cacheFiles.ToDictionary <string, string, FigureBase.PathCache>(path => path.ToLower(), path => null); var data = (from path in msgfFiles let name = path.ToLower().Replace("_msgfdb_fht.txt", ".mscache") let newName = Path.Combine(directory, name) let features = Path.Combine(directory, name) where map.ContainsKey(newName) select new FigureBase.PathCache { Cache = newName, Msgf = path, Features = features }).ToList(); // The options for the analysis var options = new SpectralOptions { MzBinSize = mzBinSize, MzTolerance = mzTolerance, NetTolerance = netTolerance, SimilarityCutoff = similarityScoreCutoff, TopIonPercent = ionPercent, IdScore = peptideScore, ComparerType = comparerType, Fdr = peptideFdr, RequiredPeakCount = numberOfRequiredPeaks }; Console.WriteLine(@"{0}", data.Count); var comparison = 0; for (var i = 0; i < data.Count; i++) { var cachex = data[i]; // Get the raw path stored in the cache file... // then get the dataset object var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache); var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf); // create a raw file reader for the datasets using (var readerX = new InformedProteomicsReader()) { // wrap it in the cached object so we can load scan meta-data var cacheReaderX = new RawLoaderCache(readerX); var cacheDataX = ScanSummaryCache.ReadCache(cachex.Cache); readerX.AddDataFile(rawPathX, 0); cacheReaderX.AddCache(0, cacheDataX); for (var j = i + 1; j < data.Count; j++) { // Then the writer for creating a report var writer = AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3, "results-figure3-largeScale" + comparison); comparison++; var cachey = data[j]; // Get the raw path stored in the cache file... // then get the dataset object var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache); var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf); // create a raw file reader for the datasets using (var readerY = new InformedProteomicsReader()) { // wrap it in the cached object so we can load scan meta-data var cacheReaderY = new RawLoaderCache(readerY); var cacheDataY = ScanSummaryCache.ReadCache(cachey.Cache); cacheReaderY.AddCache(0, cacheDataY); readerY.AddDataFile(rawPathY, 0); var names = new List <string> { data[i].Cache, data[j].Cache }; // Write the results var analysis = MatchDatasets(comparerType, cacheReaderX, cacheReaderY, options, datasetX, datasetY, names); AlignMatches(analysis, writer); } } } } }
public void GenerateFigure4_MetaMatches(string directory, SpectralComparison comparerType, double mzBinSize, double mzTolerance, double netTolerance, double similarityScoreCutoff, double peptideScore, double peptideFdr, double ionPercent, int numberOfRequiredPeaks, string name) { AlignmentAnalysisWriterFactory.BasePath = @"M:\doc\papers\paperAlignment\Data\figure4"; Console.WriteLine(@"Post-Pre Tests For {0}", directory); var cacheFiles = Directory.GetFiles(directory, "*.mscache"); Console.WriteLine(@"Building data cache"); var data = cacheFiles.Select(path => new FigureBase.PathCache { Cache = path }).ToList(); // The options for the analysis var options = new SpectralOptions { MzBinSize = mzBinSize, MzTolerance = mzTolerance, NetTolerance = netTolerance, SimilarityCutoff = similarityScoreCutoff, TopIonPercent = ionPercent, IdScore = peptideScore, ComparerType = comparerType, Fdr = peptideFdr, RequiredPeakCount = numberOfRequiredPeaks }; var comparison = 0; for (var i = 0; i < data.Count; i++) { var cachex = data[i]; // Get the raw path stored in the cache file... // then get the dataset object var rawPathX = ScanSummaryCache.ReadPath(cachex.Cache); var datasetX = new AlignmentDataset(rawPathX, "", cachex.Msgf); // create a raw file reader for the datasets using (var readerX = RawLoaderFactory.CreateFileReader(datasetX.RawFile)) { // wrap it in the cached object so we can load scan meta-data var cacheReaderX = new RawLoaderCache(readerX); var cacheDataX = ScanSummaryCache.ReadCache(cachex.Cache); readerX.AddDataFile(rawPathX, 0); cacheReaderX.AddCache(0, cacheDataX); for (var j = i + 1; j < data.Count; j++) { var cachey = data[j]; // Get the raw path stored in the cache file... // then get the dataset object var rawPathY = ScanSummaryCache.ReadPath(cachey.Cache); var datasetY = new AlignmentDataset(rawPathY, "", cachey.Msgf); // create a raw file reader for the datasets using (var readerY = RawLoaderFactory.CreateFileReader(datasetY.RawFile)) { // Then the writer for creating a report var writer = AlignmentAnalysisWriterFactory.Create(AlignmentFigureType.Figure3, name + comparison); comparison++; // wrap it in the cached object so we can load scan meta-data var cacheReaderY = new RawLoaderCache(readerY); var cacheDataY = ScanSummaryCache.ReadCache(cachey.Cache); cacheReaderY.AddCache(0, cacheDataY); readerY.AddDataFile(rawPathY, 0); var names = new List<string> { data[i].Cache, data[j].Cache }; var analysis = MatchDatasets(comparerType, readerX, readerY, options, datasetX, datasetY, names); AlignMatches(analysis, writer); writer.Close(); } } } } }
private void MatchPeptides(AlignmentDataset datasetX, AlignmentDataset datasetY, Dictionary<int, ScanSummary> scanDataX, Dictionary<int, ScanSummary> scanDataY, IEnumerable<string> names, SpectralOptions options) { // Read data for peptides var reader = PeptideReaderFactory.CreateReader(SequenceFileType.MSGF); var peptidesA = reader.Read(datasetX.PeptideFile); var peptidesB = reader.Read(datasetY.PeptideFile); peptidesA = peptidesA.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList(); peptidesB = peptidesB.ToList().Where(x => PeptideUtility.PassesCutoff(x, options.IdScore, options.Fdr)).ToList(); var peptideMapX = PeptideUtility.MapWithBestScan(peptidesA); var peptideMapY = PeptideUtility.MapWithBestScan(peptidesB); // Determine the scan extrema var maxX = scanDataX.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key; var minX = scanDataX.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key; var maxY = scanDataY.Aggregate((l, r) => l.Value.Scan > r.Value.Scan ? l : r).Key; var minY = scanDataY.Aggregate((l, r) => l.Value.Scan < r.Value.Scan ? l : r).Key; // Then map the peptide sequences to identify True Positive and False Positives var count = (from scanx in peptideMapX.Keys let peptideX = peptideMapX[scanx] from scany in peptideMapY.Keys let peptideY = peptideMapY[scany] let netX = Convert.ToDouble(scanx - minX)/Convert.ToDouble(maxX - minX) let netY = Convert.ToDouble(scany - minY)/Convert.ToDouble(maxY - minY) let net = Convert.ToDouble(netX - netY) where Math.Abs(net) < options.NetTolerance where Math.Abs(peptideX.Mz - peptideY.Mz) < options.MzTolerance where PeptideUtility.PassesCutoff(peptideX, options.IdScore, options.Fdr) && PeptideUtility.PassesCutoff(peptideY, options.IdScore, options.Fdr) && peptideX.Sequence.Equals(peptideY.Sequence) select peptideX).Count(); Console.WriteLine(); foreach (var name in names) Console.WriteLine(name); Console.WriteLine(@"Matches - {0}", count); }