public void FeatureFind(List <ProteinSpectrumMatch> prsms, LcMsRun run, string outTsvFilePath) { var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood()); // write result files var tsvWriter = new StreamWriter(outTsvFilePath); tsvWriter.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString(false)); var featureId = 1; foreach (var match in prsms) { var minScan = run.GetPrevScanNum(match.ScanNum, 1); var maxScan = run.GetNextScanNum(match.ScanNum, 1); var feature = featureFinder.GetLcMsPeakCluster(match.Mass, match.Charge, minScan, maxScan); if (feature == null) { continue; } tsvWriter.WriteLine("{0}\t{1}", featureId, LcMsFeatureFinderLauncher.GetString(feature, false)); featureId++; } tsvWriter.Close(); }
public void TestGeneratingMs1FeatureFile() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2\QC_ShewIntact_1_19Jun15_Bane_14-09-01RZ.pbf"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const string outFolderPath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Output"; if (!Directory.Exists(outFolderPath)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, outFolderPath); } //const string specFilePath = @"D:\MassSpecFiles\test\QC_Shew_Intact_4_01Jan15_Bane_C2-14-08-02RZ.raw"; const int minScanCharge = 2; const int maxScanCharge = 60; const double minScanMass = 3000; const double maxScanMass = 5000; const int maxThreads = 10; var param = new LcMsFeatureFinderInputParameter() { InputPath = specFilePath, OutputPath = outFolderPath, MinSearchMass = minScanMass, MaxSearchMass = maxScanMass, MinSearchCharge = minScanCharge, MaxSearchCharge = maxScanCharge, CsvOutput = true, ScoreReport = false, MaxThreads = maxThreads }; var featureFinder = new LcMsFeatureFinderLauncher(param); featureFinder.Run(); }
public void TestQuantifyIdedProteoforms() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFolder = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2"; const string promexOutFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; if (!Directory.Exists(rawFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder); } var nDataset = 32; var dataset = new string[nDataset]; for (var i = 0; i < nDataset; i++) { dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1); //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); } var prsmReader = new ProteinSpectrumMatchReader(0.01); var filesProcessed = 0; var tolerance = new Tolerance(10); for (var i = 0; i < dataset.Length; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile); continue; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]); if (!File.Exists(path)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", path); continue; } var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); filesProcessed++; for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName.Substring(match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5); } // PrSM To Feature var prsmToFeatureIdMap = new int[prsmList.Count]; for (var k = 0; k < prsmToFeatureIdMap.Length; k++) { prsmToFeatureIdMap[k] = -1; } // Feature To PrSM var featureToPrsm = new List <ProteinSpectrumMatchSet>(); var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood()); var featureList = new List <LcMsPeakCluster>(); var featureId = 0; for (var j = 0; j < prsmList.Count; j++) { if (prsmToFeatureIdMap[j] >= 0) { continue; } var match = prsmList[j]; var minScanNum = match.ScanNum; var maxScanNum = match.ScanNum; var mass = match.Mass; var charge = match.Charge; var massTh = tolerance.GetToleranceAsMz(mass); var id1 = match.ProteinId; var feature = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum); var prsmSet = new ProteinSpectrumMatchSet(i) { match }; if (feature == null) { feature = featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, charge, charge); prsmToFeatureIdMap[j] = featureId; } else { prsmToFeatureIdMap[j] = featureId; var etTol = Math.Max(run.GetElutionTime(run.MaxLcScan) * 0.005, feature.ElutionLength * 0.2); for (var k = j + 1; k < prsmList.Count; k++) { var otherMatch = prsmList[k]; var id2 = otherMatch.ProteinId; var et2 = run.GetElutionTime(otherMatch.ScanNum); if (id1.Equals(id2) && feature.MinElutionTime - etTol < et2 && et2 < feature.MaxElutionTime - etTol && Math.Abs(otherMatch.Mass - mass) < massTh) { prsmToFeatureIdMap[k] = featureId; prsmSet.Add(otherMatch); } } } featureId++; feature.Flag = 1; featureList.Add(feature); featureToPrsm.Add(prsmSet); } // Overlap between features??? for (var j = 0; j < featureList.Count; j++) { var f1 = featureList[j]; if (f1.Flag < 1) { continue; } var prsm1 = featureToPrsm[j]; for (var k = j + 1; k < featureList.Count; k++) { var f2 = featureList[k]; if (f2.Flag < 1) { continue; } var prsm2 = featureToPrsm[k]; if (Math.Abs(f1.Mass - f2.Mass) > tolerance.GetToleranceAsMz(f1.Mass)) { continue; } if (!f1.CoElutedByNet(f2, 0.005)) { continue; } if (!prsm1.ShareProteinId(prsm2)) { continue; } // let us merge!! if (f1.ScanLength > f2.ScanLength) { prsm1.AddRange(prsm2); prsm2.Clear(); f2.Flag = 0; } else { prsm2.AddRange(prsm1); prsm1.Clear(); f1.Flag = 0; } } } // now output results!! var ms1ftFilePath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]); var writer = new StreamWriter(ms1ftFilePath); writer.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString()); for (var j = 0; j < featureList.Count; j++) { var f1 = featureList[j]; if (f1.Flag < 1) { continue; } var prsm1 = featureToPrsm[j]; var minScanNum = run.GetPrevScanNum(prsm1.MinScanNum, 1); var maxScanNum = run.GetNextScanNum(prsm1.MaxScanNum, 1); f1.ExpandScanRange(minScanNum, maxScanNum); writer.Write("{0}\t", j + 1); writer.WriteLine(LcMsFeatureFinderLauncher.GetString(f1)); } writer.Close(); Console.WriteLine(ms1ftFilePath); } if (filesProcessed == 0) { Assert.Ignore("Skipped since data files not found"); } }
public void TestAlignFeatures() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFolder = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2"; const string promexOutFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; if (!Directory.Exists(rawFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder); } var nDataset = 32; var dataset = new string[nDataset]; for (var i = 0; i < nDataset; i++) { dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1); //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); } var tolerance = new Tolerance(10); var ftComparer = new UtexFeatureComparer(tolerance); var align = new LcMsFeatureAlignment(ftComparer); var prsmReader = new ProteinSpectrumMatchReader(0.01); var filesProcessed = 0; for (var i = 0; i < dataset.Length; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile); continue; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]); if (!File.Exists(path)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", path); continue; } var ms1ftPath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]); if (!File.Exists(ms1ftPath)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", ms1ftPath); continue; } filesProcessed++; //var map = new ProteinSpectrumMathMap(run, i, dataset[i]); //map.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName.Substring( match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5); } var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1ftPath, run); // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } align.AddDataSet(i, features, run); } if (filesProcessed == 0) { Assert.Ignore("Skipped since input files not found"); } align.AlignFeatures(); Console.WriteLine("{0} alignments ", align.CountAlignedFeatures); align.RefineAbundance(); var alignedFeatureList = align.GetAlignedFeatures(); for (var i = 0; i < nDataset; i++) { var ms1ftPath = string.Format(@"{0}\{1}_aligned.ms1ft", promexOutFolder, dataset[i]); var writer = new StreamWriter(ms1ftPath); writer.Write(LcMsFeatureFinderLauncher.GetHeaderString()); writer.WriteLine("\tIdedMs2ScanNums"); for (var j = 0; j < alignedFeatureList.Count; j++) { writer.Write(j + 1); writer.Write("\t"); if (alignedFeatureList[j][i] == null) { for (var k = 0; k < 14; k++) { writer.Write("0\t"); } writer.Write("0\n"); } else { writer.Write(LcMsFeatureFinderLauncher.GetString(alignedFeatureList[j][i])); writer.Write("\t"); if (alignedFeatureList[j][i].ProteinSpectrumMatches == null) { writer.Write(""); } else { var scanNums = string.Join(";", alignedFeatureList[j][i].ProteinSpectrumMatches.Select(prsm => prsm.ScanNum)); writer.Write(scanNums); } writer.Write("\n"); } } writer.Close(); } }
private void OutputAlignmentResult(LcMsFeatureAlignment align, string outFilePath, IReadOnlyList <string> rawFiles, bool isTemp = true) { var alignedFeatureList = align.GetAlignedFeatures(); var writer = new StreamWriter(outFilePath); writer.Write("MonoMass\tMinElutionTime\tMaxElutionTime"); for (var i = 0; i < align.CountDatasets; i++) { var dataSetName = Path.GetFileNameWithoutExtension(rawFiles[i]); writer.Write("\t{0}", dataSetName); } for (var i = 0; i < align.CountDatasets; i++) { //var dataSetName = Path.GetFileNameWithoutExtension(align.RawFileList[i]); writer.Write("\t{0}_Score", i); } /* * for (var i = 0; i < align.CountDatasets; i++) * { * //var dataSetName = Path.GetFileNameWithoutExtension(align.RawFileList[i]); * writer.Write("\t{0}_Net", i); * }*/ writer.Write("\n"); for (var i = 0; i < align.CountAlignedFeatures; i++) { var features = alignedFeatureList[i]; var minMaxNet = GetMinMaxNet(features); writer.Write(@"{0}\t{1:0.00000}\t{2:0.00000}", minMaxNet.Item1, minMaxNet.Item3, minMaxNet.Item4); for (var j = 0; j < align.CountDatasets; j++) { var feature = features[j]; writer.Write("\t"); writer.Write(feature?.Abundance ?? 0d); } for (var j = 0; j < align.CountDatasets; j++) { var feature = features[j]; writer.Write("\t"); writer.Write(feature?.Score ?? 0d); } /* * for (var j = 0; j < align.CountDatasets; j++) * { * var feature = features[j]; * writer.Write("\t"); * if (feature != null) writer.Write("{0:0.00000}", feature.MinNet); * else writer.Write(0); * } * * for (var j = 0; j < align.CountDatasets; j++) * { * var feature = features[j]; * writer.Write("\t"); * if (feature != null) writer.Write("{0:0.00000}", feature.MaxNet); * else writer.Write(0); * }*/ writer.Write("\n"); } writer.Close(); if (isTemp) { return; } var outDirectory = Path.GetDirectoryName(Path.GetFullPath(outFilePath)); for (var i = 0; i < align.CountDatasets; i++) { var dataSetName = Path.GetFileNameWithoutExtension(rawFiles[i]); //writer.Write("\t{0}", dataSetName); // now output results!! var ms1ftFilePath = string.Format(@"{0}\{1}.aligned.ms1ft", outDirectory, dataSetName); var writer2 = new StreamWriter(ms1ftFilePath); writer2.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString()); for (var j = 0; j < align.CountAlignedFeatures; j++) { var f1 = alignedFeatureList[j][i]; writer2.Write("{0}\t", j + 1); writer2.WriteLine(LcMsFeatureFinderLauncher.GetString(f1)); } writer2.Close(); } }
static int Main(string[] args) { LcMsFeatureFinderInputParameters parameters; try { var osVersionInfo = new clsOSVersionInfo(); if (osVersionInfo.GetOSVersion().ToLower().Contains("windows")) { var handle = Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, EnableExtendedFlags); } //args = new string[] {"-i", @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf", "-minMass", "3000", "-maxMass", "30000"}; var parser = new CommandLineParser <ProMexInputParameters>(Name, Version); parser.UsageExamples.Add("To create a PNG of the features in an existing ms1ft file " + "(requires both a .pbf file and a .ms1ft file):\n\tProMex.exe -i dataset.pbf -ms1ft dataset.ms1ft -featureMap"); var results = parser.ParseArgs(args); if (!results.Success) { // Wait for 1.5 seconds System.Threading.Thread.Sleep(1500); return(-1); } if (!results.ParsedResults.Validate()) { parser.PrintHelp(); // Wait for 1.5 seconds System.Threading.Thread.Sleep(1500); return(-1); } parameters = results.ParsedResults; } catch (Exception ex) { Console.WriteLine("Exception while parsing the command line parameters: " + ex.Message); return(-5); } #if (!DEBUG) try { #endif // Example text: ProMex version 1.0.6527 (November 14, 2017) // (the build date is computed automatically) Console.WriteLine("************ {0} {1} ************", Name, Version); parameters.Display(); var launcher = new LcMsFeatureFinderLauncher(parameters); int errorCode; if (string.IsNullOrWhiteSpace(parameters.ExistingFeaturesFilePath)) { errorCode = launcher.Run(); } else { errorCode = launcher.CreateFeatureMapImage(parameters.InputPath, parameters.ExistingFeaturesFilePath); } return(errorCode); #if (!DEBUG) } catch (Exception ex) { // NOTE: The DMS Analysis Manager looks for this text; do not change it Console.WriteLine("Exception while processing: " + ex.Message); Console.WriteLine(ex.StackTrace); var errorCode = -Math.Abs(ex.Message.GetHashCode()); if (errorCode == 0) { return(-1); } return(errorCode); } #endif }
static int Main(string[] args) { try { var handle = Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, EnableExtendedFlags); //args = new string[] {"-i", @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf", "-minMass", "3000", "-maxMass", "30000"}; if (args.Length == 0) { PrintUsageInfo(); return(-1); } if (args.Length % 2 != 0) { PrintUsageInfo("The number of arguments must be even."); return(-1); } // initialize parameters _paramDic = new Dictionary <string, string> { { LcMsFeatureFinderInputParameter.INPUT_FILE_PATH, null }, { LcMsFeatureFinderInputParameter.OUTPUT_FOLDER_PATH, null }, { LcMsFeatureFinderInputParameter.MINIMUM_CHARGE, "1" }, { LcMsFeatureFinderInputParameter.MAXIMUM_CHARGE, "60" }, { LcMsFeatureFinderInputParameter.MINIMUM_MASS, "2000.0" }, { LcMsFeatureFinderInputParameter.MAXIMUM_MASS, "50000.0" }, { LcMsFeatureFinderInputParameter.INCLUDE_ADDITIONAL_SCORES, "n" }, { LcMsFeatureFinderInputParameter.SAVE_CSV, "n" }, { LcMsFeatureFinderInputParameter.SAVE_PNG_FEATURE_MAP, "y" }, { LcMsFeatureFinderInputParameter.LIKELIHOOD_SCORE_THRESHOLD, "-10" }, { LcMsFeatureFinderInputParameter.MAXIMUM_THREADS, "0" }, { LcMsFeatureFinderInputParameter.EXISTING_MS1FT_FILE, "" } }; for (var i = 0; i < args.Length / 2; i++) { var key = args[2 * i]; var value = args[2 * i + 1]; if (!_paramDic.ContainsKey(key)) { PrintUsageInfo("Invalid parameter: " + key); return(-1); } _paramDic[key] = value; } // Parse command line parameters var inputFilePath = _paramDic["-i"]; if (inputFilePath == null) { PrintUsageInfo("Missing required parameter -i!"); return(-1); } if (!File.Exists(inputFilePath) && !Directory.Exists(inputFilePath)) { PrintUsageInfo("File not found: " + inputFilePath); return(-1); } } catch (Exception ex) { Console.WriteLine("Exception while parsing the command line parameters: " + ex.Message); return(-5); } #if (!DEBUG) try { #endif var param = new LcMsFeatureFinderInputParameter(_paramDic); Console.WriteLine("************ {0} {1} ************", Name, Version); param.Display(); var launcher = new LcMsFeatureFinderLauncher(param); int errorCode; if (string.IsNullOrWhiteSpace(param.ExistingFeaturesFilePath)) { errorCode = launcher.Run(); } else { errorCode = launcher.CreateFeatureMapImage(param.InputPath, param.ExistingFeaturesFilePath); } return(errorCode); #if (!DEBUG) } catch (Exception ex) { // NOTE: The DMS Analysis Manager looks for this text; do not change it Console.WriteLine("Exception while processing: " + ex.Message); Console.WriteLine(ex.StackTrace); var errorCode = -Math.Abs(ex.Message.GetHashCode()); if (errorCode == 0) { return(-1); } return(errorCode); } #endif }