/// <summary> /// Builds a peak matcher object. /// </summary> public void BuildPeakMatcher(MultiAlignAnalysisOptions options) { var tolerances = new FeatureMatcherTolerances(); var stanleyMatcher = new STACAdapter <UMCClusterLight> { Options = { HistogramBinWidth = options.StacOptions.HistogramBinWidth, HistogramMultiplier = options.StacOptions.HistogramMultiplier, ShiftAmount = options.StacOptions.ShiftAmount, ShouldCalculateHistogramFDR = options.StacOptions.ShouldCalculateHistogramFDR, ShouldCalculateShiftFDR = options.StacOptions.ShouldCalculateShiftFDR, ShouldCalculateSLiC = options.StacOptions.ShouldCalculateSLiC, ShouldCalculateSTAC = options.StacOptions.ShouldCalculateSTAC, UseDriftTime = options.StacOptions.UseDriftTime, UseEllipsoid = options.StacOptions.UseEllipsoid, UsePriors = options.StacOptions.UsePriors } }; tolerances.DriftTimeTolerance = Convert.ToSingle(options.StacOptions.DriftTimeTolerance); tolerances.MassTolerancePPM = options.StacOptions.MassTolerancePPM; tolerances.NETTolerance = options.StacOptions.NETTolerance; tolerances.Refined = options.StacOptions.Refined; stanleyMatcher.Options.UserTolerances = tolerances; m_provider.PeakMatcher = stanleyMatcher; }
/// <summary> /// Builds a peak matcher object. /// </summary> public void BuildPeakMatcher(MultiAlignAnalysisOptions options) { var tolerances = new FeatureMatcherTolerances(); var stanleyMatcher = new STACAdapter<UMCClusterLight> { Options = { HistogramBinWidth = options.StacOptions.HistogramBinWidth, HistogramMultiplier = options.StacOptions.HistogramMultiplier, ShiftAmount = options.StacOptions.ShiftAmount, ShouldCalculateHistogramFDR = options.StacOptions.ShouldCalculateHistogramFDR, ShouldCalculateShiftFDR = options.StacOptions.ShouldCalculateShiftFDR, ShouldCalculateSLiC = options.StacOptions.ShouldCalculateSLiC, ShouldCalculateSTAC = options.StacOptions.ShouldCalculateSTAC, UseDriftTime = options.StacOptions.UseDriftTime, UseEllipsoid = options.StacOptions.UseEllipsoid, UsePriors = options.StacOptions.UsePriors } }; tolerances.DriftTimeTolerance = Convert.ToSingle(options.StacOptions.DriftTimeTolerance); tolerances.MassTolerancePPM = options.StacOptions.MassTolerancePPM; tolerances.NETTolerance = options.StacOptions.NETTolerance; tolerances.Refined = options.StacOptions.Refined; stanleyMatcher.Options.UserTolerances = tolerances; m_provider.PeakMatcher = stanleyMatcher; }
public void TestLamarcheGridApp(string csvPath) { //Read a csv file, put the data into a new UMCLight for each one var csvFileText = File.ReadAllLines(csvPath); var csvDataList = new List <UMCLight> { Capacity = csvFileText.Length }; foreach (var line in csvFileText) { var parsedLine = line.Split(','); var umcDataMember = new UMCLight(); //put the data from the parsed line into the umcDataMember in the appropriate fashion csvDataList.Add(umcDataMember); } //Create clusters from the data read in from the file UMCClusterLight cluster = null; var filteredClusters = new List <UMCClusterLight>(); if (!Filter(cluster)) { //Save the cluster filteredClusters.Add(cluster); } //Read a mtdb file using MACore or sqliteDb var databasePath = @"C:\UnitTestFolder\MSGFPlus\blah.db"; //Either read from console, or entered at program execution // Console.ReadLine(databasePath) or databasePath = args[2] var database = ReadDatabase(databasePath); var stacAdapter = new STACAdapter <UMCClusterLight>(); var matchList = stacAdapter.PerformPeakMatching(filteredClusters, database); string writePath = null; // As with databasePath, could either be read from console, or entered at program execution // Console.ReadLine(writePath) or, if(args.Length >= 4){ writePath = args[3]; } // If writePath isn't entered, then it writes to a default file, defined inside the WriteData method WriteData(matchList, writePath); }
/// <summary> /// Performs matching operation on the selected datasets. /// </summary> /// <param name="datasets">The datasets to perform AMT matching on.</param> internal void PerformMatching(IEnumerable <DatasetInformationViewModel> datasets) { this.ShouldShowTotalProgress = true; var progressData = new ProgressData(new Progress <ProgressData>(pd => { // This is the progress percent after stepping by progressData this.TotalProgress = pd.Percent; })) { IsPartialRange = true, MaxPercentage = 95 }; var progress = new Progress <ProgressData>(pd => progressData.Report(pd.Percent)); // Get all possible charge states in data. var chargeStates = this.analysis.DataProviders.FeatureCache.RetrieveChargeStates().ToList(); // Initialize STAC var stac = new STACAdapter <UMCClusterLight> { Options = new FeatureMatcherParameters { UseEllipsoid = this.ShouldUseEllipsoid, ShouldCalculateSLiC = this.ShouldCalculateSlic, HistogramBinWidth = this.HistogramBinWidth, HistogramMultiplier = this.HistogramMultiplier, UserTolerances = new FeatureMatcherTolerances { MassTolerancePPM = this.DatabaseSelectionViewModel.MassTolerance, NETTolerance = this.DatabaseSelectionViewModel.NetTolerance }, ChargeStateList = chargeStates, UsePriors = this.UsePriors, ShouldCalculateSTAC = this.ShouldCalculateStac, ShouldCalculateHistogramFDR = this.CalculateHistogramFdr, ShouldCalculateShiftFDR = false, }, }; // Initialize datasets datasets.ForEach(ds => ds.DatasetState = DatasetInformationViewModel.DatasetStates.Matching); // Get clusters and database var clusters = this.analysis.DataProviders.ClusterCache.FindAll(); var clusterIdMap = clusters.ToDictionary(cluster => cluster.Id); var database = this.analysis.MassTagDatabase; // Run STAC var matches = stac.PerformPeakMatching(clusters, database); datasets.ForEach(ds => ds.DatasetState = DatasetInformationViewModel.DatasetStates.PersistingMatches); // Persist matches var clusterToMassTags = matches.Select(match => new ClusterToMassTagMap(match.Observed.Id, match.Target.Id)).ToList(); this.analysis.DataProviders.MassTagMatches.ClearAllMatches(); this.analysis.DataProviders.MassTagMatches.AddAllStateless(clusterToMassTags, progress); try { // Write to file this.WriteClusterData("crosstab.tsv", matches, clusterIdMap, progress); } catch (Exception ex) { var errMsg = "Error writing results to text file: " + ex.Message; Logger.PrintMessage(errMsg); // Todo: Add this: if (!GlobalSettings.AutomatedAnalysisMode) MessageBox.Show(errMsg); } this.ShouldShowTotalProgress = false; datasets.ForEach(ds => ds.DatasetState = DatasetInformationViewModel.DatasetStates.Matched); }
public void TestLamarcheGridApp(string csvPath) { //Read a csv file, put the data into a new UMCLight for each one var csvFileText = File.ReadAllLines(csvPath); var csvDataList = new List<UMCLight> {Capacity = csvFileText.Length}; foreach (var line in csvFileText) { var parsedLine = line.Split(','); var umcDataMember = new UMCLight(); //put the data from the parsed line into the umcDataMember in the appropriate fashion csvDataList.Add(umcDataMember); } //Create clusters from the data read in from the file UMCClusterLight cluster = null; var filteredClusters = new List<UMCClusterLight>(); if (!Filter(cluster)) { //Save the cluster filteredClusters.Add(cluster); } //Read a mtdb file using MACore or sqliteDb var databasePath = @"C:\UnitTestFolder\MSGFPlus\blah.db"; //Either read from console, or entered at program execution // Console.ReadLine(databasePath) or databasePath = args[2] var database = ReadDatabase(databasePath); var stacAdapter = new STACAdapter<UMCClusterLight>(); var matchList = stacAdapter.PerformPeakMatching(filteredClusters, database); string writePath = null; // As with databasePath, could either be read from console, or entered at program execution // Console.ReadLine(writePath) or, if(args.Length >= 4){ writePath = args[3]; } // If writePath isn't entered, then it writes to a default file, defined inside the WriteData method WriteData(matchList, writePath); }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string[] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); var clusterIdMap = new Dictionary <int, UMCClusterLight>(); try { if (args.Length < 2) { Console.WriteLine("MultiAlignSTACRunner crosstabDirectory databasePath outputName"); Console.WriteLine("\tThe cross-tab file will be named similar to the database path"); return(1); } // Setup the analysis processing Logger.PrintMessage("Find all datasets", true); var directoryPath = args[0].Replace("\r", "").Replace("\n", ""); var databasePath = args[1].Replace("\r", "").Replace("\n", ""); var name = args[2].Replace("\r", "").Replace("\n", ""); var files = Directory.GetFiles(directoryPath, "*.csv"); Logger.PrintMessage("Creating Log File"); var loggerPath = AnalysisPathUtils.BuildLogPath(Path.GetDirectoryName(name), Path.GetFileNameWithoutExtension(name)); Logger.LogPath = loggerPath; Logger.PrintMessage("Saving Log Data to: " + loggerPath); Logger.PrintMessage(string.Format("Creating STAC"), true); // Hardcode bad, but per discussions with OHSU var stac = new STACAdapter <UMCClusterLight> { Options = new FeatureMatcherParameters { UserTolerances = { MassTolerancePPM = 25, NETTolerance = .035, DriftTimeTolerance = 3 }, UseDriftTime = true } }; var clusterFilteringOptions = new FilteringOptions(); WriteOptionsToLogFile(clusterFilteringOptions); WriteOptionsToLogFile(stac.Options); // Read the cluster data var allClusters = new List <UMCClusterLight>(); var clusterId = 0; foreach (var file in files) { var filename = Path.GetFileName(file); Logger.PrintMessage(string.Format("Reading {0}", filename)); const string chargeString = "charge"; if (filename == null) { continue; } if (!filename.Contains(chargeString)) { continue; } // Read each file. var start = DateTime.Now; var xname = filename.Replace("_", ""); // ReSharper disable once StringIndexOfIsCultureSpecific.1 var indexOfChargeString = xname.IndexOf(chargeString); var charge = Convert.ToInt32(xname.Substring(indexOfChargeString + chargeString.Length, 1)); var clusters = ReadClusters(file, clusterFilteringOptions); var end = DateTime.Now; Logger.PrintMessage(string.Format("\tReading Took {0:.00} seconds", end.Subtract(start).TotalSeconds)); foreach (var cluster in clusters) { clusterIdMap.Add(clusterId, cluster); cluster.Id = clusterId++; cluster.ChargeState = charge; } allClusters.AddRange(clusters); } // Load the database Logger.PrintMessage(string.Format("Loading Mass Tag Database: {0}", Path.GetFileNameWithoutExtension(databasePath))); var options = new MassTagDatabaseOptions(); var databaseDefinition = new InputDatabase { DatabaseFormat = MassTagDatabaseFormat.Sqlite, LocalPath = databasePath }; var database = MtdbLoaderFactory.LoadMassTagDatabase(databaseDefinition, options); // Run stac try { Logger.PrintMessage("Matching clusters to peptides in mass tag database."); var matches = stac.PerformPeakMatching(allClusters, database); Logger.PrintMessage(string.Format("Writing Results To {0}", name)); var duplicateMatches = new Dictionary <int, Dictionary <int, FeatureMatchLight <UMCClusterLight, MassTagLight> > >(); foreach (var match in matches) { if (!duplicateMatches.ContainsKey(match.Observed.Id)) { duplicateMatches.Add(match.Observed.Id, new Dictionary <int, FeatureMatchLight <UMCClusterLight, MassTagLight> >()); } if (!duplicateMatches[match.Observed.Id].ContainsKey(match.Target.Id)) { duplicateMatches[match.Observed.Id].Add(match.Target.Id, match); } } WriteClusterData(name, duplicateMatches, clusterIdMap); Logger.PrintMessage("ANALYSIS SUCCESS"); return(0); } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return(1); } }
public void CompareMs2IdsToMs1Ids(string liquidResultsPath, string isosFile, string rawFile) { // Read mass tags. var massTagReader = new LiquidResultsFileLoader(liquidResultsPath); var massTags = massTagReader.LoadDatabase(); // Get identifications - this rereads the liquid results file, but I'm leaving it that way // for now because this is just a test. var scansToIds = this.GetIds(liquidResultsPath); // Read raw data file. var spectraProviderCache = new SpectraProviderCache(); var spectraProvider = spectraProviderCache.GetSpectraProvider(rawFile); // Read isos features var isosReader = new MsFeatureLightFileReader(); isosReader.IsosFilteroptions = new DeconToolsIsosFilterOptions { MaximumIsotopicFit = 0.15 }; var msFeatures = isosReader.ReadFile(isosFile).ToList(); // Get LCMS features var msFeatureClusterer = new MsToLcmsFeatures(spectraProvider); var lcmsFeatures = msFeatureClusterer.Convert(msFeatures); lcmsFeatures.ForEach(feature => { feature.NetAligned = feature.Net; feature.MassMonoisotopicAligned = feature.MassMonoisotopic; }); // Create clusters - Since this is only working on a single dataset, there should be a 1:1 mapping // between LCMS features and clusters. var clusters = new List <UMCClusterLight> { Capacity = lcmsFeatures.Count }; foreach (var lcmsFeature in lcmsFeatures) { var cluster = new UMCClusterLight(lcmsFeature); cluster.CalculateStatistics(ClusterCentroidRepresentation.Median); clusters.Add(cluster); } // Do STAC AMT matching var stacAdapter = new STACAdapter <UMCClusterLight> { Options = new FeatureMatcherParameters { ShouldCalculateShiftFDR = false, UsePriors = true, UseEllipsoid = true, UseDriftTime = false, ShouldCalculateSTAC = true, } }; var amtMatches = stacAdapter.PerformPeakMatching(clusters, massTags); // Group AMT matches by cluster, convert MassTags to Protein objects (represents lipid ID, // rather than Protein ID here) for simplicity in comparing them to the MS/MS IDs. var ms1Matches = clusters.ToDictionary(cluster => cluster, cluster => new List <Protein>()); foreach (var amtMatch in amtMatches) { var cluster = amtMatch.Observed; var massTag = amtMatch.Target; ms1Matches[cluster].Add(new Protein { Name = massTag.ProteinName, Sequence = massTag.PeptideSequence, ChemicalFormula = massTag.PeptideSequence }); } // Now we need to backtrack MS/MS identifications -> clusters var ms2Matches = new Dictionary <UMCClusterLight, List <Protein> >(); foreach (var cluster in clusters) { ms2Matches.Add(cluster, new List <Protein>()); foreach (var lcmsFeature in cluster.UmcList) { foreach (var msFeature in lcmsFeature.MsFeatures) { foreach (var msmsFeature in msFeature.MSnSpectra) { if (scansToIds.ContainsKey(msmsFeature.Scan)) { ms2Matches[cluster].AddRange(scansToIds[msmsFeature.Scan]); } } } } } // How many clusters have IDs from MS/MS? var clusterMs1IdCount = ms1Matches.Values.Count(value => value.Any()); var clusterMs2IdCount = ms2Matches.Values.Count(value => value.Any()); int overlapCount = 0; // Number of IDs that overlapped between MS1 and MS2 identifications. // Finally compare the MS1 IDs to the MS2 IDs. foreach (var cluster in clusters) { // For now only comparing by name var ms1Ids = ms1Matches[cluster]; var ms1Lipids = ms1Ids.Select(id => id.Name); var ms2Ids = ms2Matches[cluster]; var ms2Lipids = ms2Ids.Select(id => id.Name); // Compare MS1 IDs for the cluster vs MS2 IDs for the cluster. var ms1OnlyIds = ms1Lipids.Where(lipid => !ms2Lipids.Contains(lipid)); var ms2OnlyIds = ms2Lipids.Where(lipid => !ms1Lipids.Contains(lipid)); overlapCount += ms1OnlyIds.Intersect(ms2OnlyIds).Count(); // Write Results if (ms1OnlyIds.Any() || ms2OnlyIds.Any()) { Console.WriteLine("Cluster {0}:", cluster.Id); if (ms1OnlyIds.Any()) { Console.WriteLine("\tMs1 Only IDs:"); foreach (var id in ms1OnlyIds) { Console.WriteLine("\t\t{0}", id); } } if (ms2OnlyIds.Any()) { Console.WriteLine("\tMs2 Only IDs:"); foreach (var id in ms2OnlyIds) { Console.WriteLine("\t\t{0}", id); } } } } Console.WriteLine("Overlap: {0}", overlapCount); }
/// <summary> /// The main entry point for the application. /// </summary> static int Main(string[] args) { var handle = System.Diagnostics.Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, ENABLE_EXTENDED_FLAGS); var clusterIdMap = new Dictionary<int, UMCClusterLight>(); try { if (args.Length < 2) { Console.WriteLine("MultiAlignSTACRunner crosstabDirectory databasePath outputName"); Console.WriteLine("\tThe cross-tab file will be named similar to the database path"); return 1; } // Setup the analysis processing Logger.PrintMessage("Find all datasets", true); var directoryPath = args[0].Replace("\r","").Replace("\n",""); var databasePath = args[1].Replace("\r","").Replace("\n",""); var name = args[2].Replace("\r","").Replace("\n",""); var files = Directory.GetFiles(directoryPath, "*.csv"); Logger.PrintMessage("Creating Log File"); var loggerPath = AnalysisPathUtils.BuildLogPath(Path.GetDirectoryName(name), Path.GetFileNameWithoutExtension(name)); Logger.LogPath = loggerPath; Logger.PrintMessage("Saving Log Data to: " + loggerPath); Logger.PrintMessage(string.Format("Creating STAC"), true); // Hardcode bad, but per discussions with OHSU var stac = new STACAdapter<UMCClusterLight> { Options = new FeatureMatcherParameters { UserTolerances = { MassTolerancePPM = 25, NETTolerance = .035, DriftTimeTolerance = 3 }, UseDriftTime = true } }; var clusterFilteringOptions = new FilteringOptions(); WriteOptionsToLogFile(clusterFilteringOptions); WriteOptionsToLogFile(stac.Options); // Read the cluster data var allClusters = new List<UMCClusterLight>(); var clusterId = 0; foreach (var file in files) { var filename = Path.GetFileName(file); Logger.PrintMessage(string.Format("Reading {0}", filename)); const string chargeString = "charge"; if (filename == null) continue; if (!filename.Contains(chargeString)) continue; // Read each file. var start = DateTime.Now; var xname = filename.Replace("_", ""); // ReSharper disable once StringIndexOfIsCultureSpecific.1 var indexOfChargeString = xname.IndexOf(chargeString); var charge = Convert.ToInt32(xname.Substring(indexOfChargeString + chargeString.Length, 1)); var clusters = ReadClusters(file, clusterFilteringOptions); var end = DateTime.Now; Logger.PrintMessage(string.Format("\tReading Took {0:.00} seconds", end.Subtract(start).TotalSeconds)); foreach (var cluster in clusters) { clusterIdMap.Add(clusterId, cluster); cluster.Id = clusterId++; cluster.ChargeState = charge; } allClusters.AddRange(clusters); } // Load the database Logger.PrintMessage(string.Format("Loading Mass Tag Database: {0}", Path.GetFileNameWithoutExtension(databasePath))); var options = new MassTagDatabaseOptions(); var databaseDefinition = new InputDatabase { DatabaseFormat = MassTagDatabaseFormat.Sqlite, LocalPath = databasePath }; var database = MtdbLoaderFactory.LoadMassTagDatabase(databaseDefinition, options); // Run stac try { Logger.PrintMessage("Matching clusters to peptides in mass tag database."); var matches = stac.PerformPeakMatching(allClusters, database); Logger.PrintMessage(string.Format("Writing Results To {0}", name)); var duplicateMatches = new Dictionary<int, Dictionary<int, FeatureMatchLight<UMCClusterLight, MassTagLight>>>(); foreach (var match in matches) { if (!duplicateMatches.ContainsKey(match.Observed.Id)) duplicateMatches.Add(match.Observed.Id, new Dictionary<int, FeatureMatchLight<UMCClusterLight, MassTagLight>>()); if (!duplicateMatches[match.Observed.Id].ContainsKey(match.Target.Id)) duplicateMatches[match.Observed.Id].Add(match.Target.Id, match); } WriteClusterData(name, duplicateMatches, clusterIdMap); Logger.PrintMessage("ANALYSIS SUCCESS"); return 0; } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return 1; } } catch (Exception ex) { Logger.PrintMessage("Unhandled Error: " + ex.Message, true); var innerEx = ex.InnerException; while (innerEx != null) { Logger.PrintMessage("Inner Exception: " + innerEx.Message); innerEx = innerEx.InnerException; } Logger.PrintMessage("Stack: " + ex.StackTrace, true); Logger.PrintMessage(""); Logger.PrintMessage("ANALYSIS FAILED"); return 1; } }