public void TestPathUtils() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } Console.WriteLine(MassSpecDataReaderFactory.RemoveExtension(rawFilePath) + "_Target.tsv"); Console.WriteLine(Path.GetDirectoryName(rawFilePath)); Console.WriteLine(Path.Combine(Path.GetDirectoryName(rawFilePath), Path.GetFileNameWithoutExtension(rawFilePath) + "_IcTarget.tsv")); var outputDir = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\L1_1_Mode2\Synocho_L1_1_IcTarget.tsv"; if (!Directory.Exists(outputDir)) { if (!File.GetAttributes(outputDir).HasFlag(FileAttributes.Directory)) { throw new Exception(outputDir + " is not a directory!"); } Directory.CreateDirectory(outputDir); } Console.WriteLine(outputDir); }
/// <summary> /// Process the given file or folder /// </summary> /// <returns>0 if success, otherwise an error code</returns> /// <remarks>Folder could either be a supported mass spec data folder, or a normal directory with several supported data files</remarks> public int Run() { // Normalize the input path. Only affects paths to a file/folder in a folder-type dataset Parameters.InputPath = MassSpecDataReaderFactory.NormalizeDatasetPath(Parameters.InputPath); var attr = File.GetAttributes(Parameters.InputPath); int errorCode; if ((attr & FileAttributes.Directory) == FileAttributes.Directory && !MassSpecDataReaderFactory.IsADirectoryDataset(Parameters.InputPath)) { errorCode = ProcessDirectory(Parameters.InputPath); } else { if (!MsRawFile(Parameters.InputPath) && !MsPbfFile(Parameters.InputPath)) { ShowErrorMessage(@"File extension not supported, " + Parameters.InputPath); return(-1); } errorCode = ProcessFile(Parameters.InputPath); } return(errorCode); }
public bool Validate() { // Check for folder-type datasets, and replace specFilePath with the directory name if it is. SourcePath = MassSpecDataReaderFactory.GetDatasetName(SourcePath); var isDirectoryDataset = MassSpecDataReaderFactory.IsADirectoryDataset(SourcePath); // True if specFilePath is a directory that is NOT a supported folder-type dataset. var specPathIsDirectory = Directory.Exists(SourcePath) && !isDirectoryDataset; if (!File.Exists(SourcePath) && !specPathIsDirectory && !isDirectoryDataset) { PrintError("File not found: " + SourcePath); return(false); } var types = MassSpecDataReaderFactory.MassSpecDataTypeFilterList; types.Remove(".pbf"); if (!specPathIsDirectory && !(types.Select(ext => SourcePath.ToLower().EndsWith(ext)).Any())) { PrintError("Invalid file extension: (" + Path.GetExtension(SourcePath) + ") " + SourcePath); return(false); } if (string.IsNullOrWhiteSpace(OutputDir)) { // Must use "Path.GetFullPath" to return the absolute path when the source file is in the working directory // But, it could cause problems with too-long paths. OutputDir = specPathIsDirectory ? SourcePath : Path.GetDirectoryName(Path.GetFullPath(SourcePath)); } if (string.IsNullOrWhiteSpace(OutputDir)) { PrintError("Invalid output file directory: " + OutputDir); return(false); } if (EndScan < StartScan && EndScan > -1) { PrintError($"End scan ({EndScan}) must not be less than start scan ({StartScan})!"); return(false); } if (!Directory.Exists(OutputDir)) { if (File.Exists(OutputDir) && !File.GetAttributes(OutputDir).HasFlag(FileAttributes.Directory)) { PrintError("OutputDir \"" + OutputDir + "\" is not a directory!"); return(false); } Directory.CreateDirectory(OutputDir); } return(true); }
public void TestTagBasedSearch() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // const string rawFilePath = @"H:\Research\Lewy\raw\Lewy_intact_01.raw"; // const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; // const string rawFilePath = @"H:\Research\Yufeng\TopDownYufeng\raw\yufeng_column_test2.raw"; // const string rawFilePath = @"H:\Research\Weijun_TopDown\raw\UC4_Intact_plasmaTest_90_6May15_Bane_14-09-01RZ.raw"; // const string rawFilePath = @"H:\Research\Charles\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw"; const string rawFilePath = @"D:\MassSpecFiles\60k\Yufeng_SampleTest1_150614113438.pbf"; //const string rawFilePath = @"D:\MassSpecFiles\60k\NCR_50K_Test_24Jun15_Bane_15-02-02RZ.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); //const int minTagLength = 5; var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, ".seqtag"); //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100); const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_003836_DA9CC1E4.fasta"; //const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_004973_9BA6912F.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var tolerance = new Tolerance(10); // var modsFilePath = @"H:\Research\QCShew_TopDown\Production\Mods_Methyl.txt"; var modsFilePath = @"D:\MassSpecFiles\60k\Mods.txt"; // var modsFilePath = ""; if (!File.Exists(modsFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath); } var aaSet = new AminoAcidSet(modsFilePath); TestTagBasedSearch(run, fastaDb, tolerance, aaSet); }
public void TestTagBasedSearchCompRef() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSetPath = @"D:\MassSpecFiles\CompRef"; const string fastaFilePath = @"D:\MassSpecFiles\CompRef\ID_003278_4B4B3CB1.fasta"; const string modsFilePath = @"D:\MassSpecFiles\CompRef\Mods.txt"; if (!Directory.Exists(dataSetPath)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, dataSetPath); } if (!File.Exists(modsFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fileEntries = Directory.GetFiles(dataSetPath); var dataset = (from fileName in fileEntries where fileName.EndsWith("pbf") select Path.GetFileNameWithoutExtension(fileName)).ToList(); dataset.Sort(); var fastaDb = new FastaDatabase(fastaFilePath); var tolerance = new Tolerance(10); var aaSet = new AminoAcidSet(modsFilePath); for (var i = 0; i < dataset.Count; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", dataSetPath, dataset[i]); var ms1File = string.Format(@"{0}\{1}.ms1ft", dataSetPath, dataset[i]); var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, ".seqtag"); var run = PbfLcMsRun.GetLcMsRun(rawFile); const int minTagLength = 5; //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100); Console.WriteLine("-----------------{0}--------------------", rawFile); TestTagBasedSearch(run, fastaDb, tolerance, aaSet); Console.WriteLine("-----------------------------------------------------------------------"); } }
/// <summary> /// Initialize this data set by reading the raw file asynchronously and initializing child view models /// </summary> /// <param name="filePath">The raw File Path.</param> /// <returns> /// The <see cref="Task"/>. /// </returns> public async Task InitializeAsync(string filePath) { filePath = MassSpecDataReaderFactory.NormalizeDatasetPath(filePath); this.IsLoading = true; // Show animated loading screen this.Title = Path.GetFileNameWithoutExtension(Path.GetFileNameWithoutExtension(filePath)); this.rawFilePath = filePath; this.LoadProgressPercent = 0.0; this.LoadProgressStatus = "Loading..."; var progress = new Progress <ProgressData>(progressData => { progressData.UpdateFrequencySeconds = 2; if (progressData.ShouldUpdate()) { this.LoadProgressPercent = progressData.Percent; this.LoadProgressStatus = progressData.Status; } }); // load raw file this.LcMs = await Task.Run(() => PbfLcMsRun.GetLcMsRun(filePath, 0, 0, progress)); // Now that we have an LcMsRun, initialize viewmodels that require it this.XicViewModel = new XicViewModel(this.dialogService, this.LcMs); this.SpectrumViewModel = new SpectrumViewModel(this.dialogService, this.LcMs); this.FeatureMapViewModel = new FeatureViewerViewModel((LcMsRun)this.LcMs, this.dialogService); // When the selected scan changes in the xic plots, the selected scan for the prsm should update this.XicViewModel.SelectedScanUpdated().Subscribe(scan => this.SelectedPrSm.Scan = scan); // When an ID is selected on FeatureMap, update selectedPrSm this.FeatureMapViewModel.FeatureMapViewModel.WhenAnyValue(x => x.SelectedPrSm).Where(prsm => prsm != null).Subscribe(prsm => this.SelectedPrSm = prsm); // Create prsms for scan numbers (unidentified) await this.LoadScans(); ////await this.ScanViewModel.ToggleShowInstrumentDataAsync(IcParameters.Instance.ShowInstrumentData, (PbfLcMsRun)this.LcMs); this.SelectedPrSm.LcMs = this.LcMs; // For the selected PrSm, we should always use the LcMsRun for this dataset. this.IsLoading = false; // Hide animated loading screen }
public void TestTagMatchingSingleSpec() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; const int scanNum = 4533; // Parse sequence tags var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag"); const int minTagLength = 8; const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; if (!File.Exists(tagFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var tags = tagParser.GetSequenceTags(scanNum); foreach (var tag in tags) { var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence) .Select(index => fastaDb.GetProteinName(index)).ToArray(); if (matchedProteins.Any()) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", tag.Sequence, tag.IsPrefix, tag.FlankingMass, string.Join("\t", matchedProteins)); } } }
public void TestTagBasedSearchForLewy() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFilePath = @"D:\MassSpecFiles\Lewy\Lewy_AT_AD1_21May15_Bane_14-09-01RZ.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); //const int minTagLength = 4; var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, ".seqtag"); //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 10000); const string fastaFilePath = @"D:\MassSpecFiles\Lewy\a4_human.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var tolerance = new Tolerance(10); var modsFilePath = @"D:\MassSpecFiles\Lewy\Mods.txt"; if (!File.Exists(modsFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath); } var aaSet = new AminoAcidSet(modsFilePath); TestTagBasedSearch(run, fastaDb, tolerance, aaSet); }
/// <summary> /// Create a PNG image of previously found MS1 features /// </summary> /// <param name="pbfFilePath">.pbf file path</param> /// <param name="ms1FeaturesFilePath">.ms1ft file path</param> /// <returns>0 if success, otherwise an error code</returns> /// <remarks> /// If ms1FeaturesFilePath is an empty string, it is auto-determined based on the .pbf file name /// Mass range is determined using Parameters.MinSearchMass and Parameters.MaxSearchMass /// </remarks> public int CreateFeatureMapImage(string pbfFilePath, string ms1FeaturesFilePath) { if (!File.Exists(pbfFilePath)) { Console.WriteLine(@"Error: Data file not found: " + pbfFilePath); return(-1); } var outDirectory = GetOutputDirectory(pbfFilePath); if (string.IsNullOrEmpty(outDirectory)) { return(-2); } var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(pbfFilePath)); if (string.IsNullOrEmpty(ms1FeaturesFilePath) || string.Equals(ms1FeaturesFilePath, ".")) { ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension); } var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png"); if (!File.Exists(ms1FeaturesFilePath)) { Console.WriteLine(@"Error: MS1 features file not found: " + ms1FeaturesFilePath); return(-3); } Console.WriteLine(@"Start loading MS1 data from {0}", pbfFilePath); var run = PbfLcMsRun.GetLcMsRun(pbfFilePath); CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath); return(0); }
/// <summary> /// Find features in the data file /// </summary> /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param> /// <returns>0 if success; negative number on error</returns> private int ProcessFile(string rawFile) { var outDirectory = GetOutputDirectory(rawFile); if (string.IsNullOrEmpty(outDirectory)) { return(-1); } var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile)); var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension); var outCsvFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv"); var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png"); if (File.Exists(ms1FeaturesFilePath)) { Console.WriteLine(@"ProMex output already exists: {0}", ms1FeaturesFilePath); return(-2); } if (!File.Exists(rawFile)) { ShowErrorMessage(@"Cannot find input file: " + rawFile); return(-3); } var stopwatch = Stopwatch.StartNew(); Console.WriteLine(@"Start loading MS1 data from {0}", rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads); Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); if (run.GetMs1ScanVector().Length == 0) { ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile)); return(-4); } var comparer = featureFinder.Comparer; var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass); var maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine(@"Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = ((double)processedBins / totalMassBin) * 100; Console.WriteLine(@"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine(@"Complete MS1 feature extraction."); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures); Console.WriteLine(@"Start selecting mutually independent features from feature network graph"); stopwatch.Restart(); // write result files var tsvWriter = new StreamWriter(ms1FeaturesFilePath); tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport)); StreamWriter csvWriter = null; if (Parameters.CsvOutput) { csvWriter = new StreamWriter(outCsvFilePath); csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID"); } var filteredFeatures = container.GetFilteredFeatures(featureFinder); var featureId = 0; foreach (var feature in filteredFeatures) { featureId++; tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport)); var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0]; if (csvWriter != null) { foreach (var envelope in feature.EnumerateEnvelopes()) { //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0]; var mostAbuPeak = envelope.Peaks[mostAbuIdx]; if (mostAbuPeak == null || !mostAbuPeak.Active) { continue; } var fitscore = 1.0 - feature.BestCorrelationScore; csvWriter.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance, mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId)); } } } tsvWriter.Close(); Console.WriteLine(@"Complete feature filtration"); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of filtered features = {0}", featureId); Console.WriteLine(@" - ProMex output: {0}", ms1FeaturesFilePath); if (csvWriter != null) { csvWriter.Close(); Console.WriteLine(@" - ProMex output in ICR2LS format: {0}", outCsvFilePath); } if (Parameters.FeatureMapImage) { CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath); } return(0); }
private bool HasExistingPbfFile(string path) { return(File.Exists(MassSpecDataReaderFactory.ChangeExtension(path, ".pbf"))); }
/// <summary> /// Look for the default .pbf testing file in the default test file folder, /// plus also in the UnitTest_Files directory in this project or solution /// </summary> /// <param name="createIfMissing">If true and the .mzML file is ofund, create the .pbf file</param> /// <returns>Path to the file if found, otherwise the default path on Proto-2</returns> public static string GetPbfTestFilePath(bool createIfMissing) { var methodName = MethodBase.GetCurrentMethod().Name; var pbfFilePath = Path.Combine(DEFAULT_SPEC_FILES_FOLDER, PBF_TEST_FILE); var pbfFileInfo = GetTestFile("PbfTestFilePath", pbfFilePath, false); if (pbfFileInfo != null) { // Check for a lock file, which would indicate another process is creating the .pbf file var existingLockFile = new FileInfo(Path.Combine(pbfFileInfo.FullName + ".lock")); WaitForLockFile(existingLockFile, false); pbfFileInfo.Refresh(); if (pbfFileInfo.Exists) { DeleteLockFile(existingLockFile); return(pbfFileInfo.FullName); } } if (!createIfMissing) { // Not creating the file if missing; return the default path, despite the fact that the file does not exist // The calling method will discover that the file is missing and act accordingly return(pbfFilePath); } // Create the missing file var mzmlFilePath = Path.Combine(DEFAULT_SPEC_FILES_FOLDER, MZML_TEST_FILE); var mzmlFileInfo = GetTestFile("MzmlTestFilePath", mzmlFilePath, false); if (mzmlFileInfo?.DirectoryName == null) { // Unable to create the pbf file; return the default path, despite the fact that the file does not exist return(pbfFilePath); } ShowMessage(methodName, string.Format("Creating {0} using {1}", PBF_TEST_FILE, mzmlFileInfo.FullName)); var lockFile = new FileInfo(Path.Combine(mzmlFileInfo.DirectoryName, Path.GetFileNameWithoutExtension(mzmlFileInfo.Name) + ".pbf.lock")); var newPbfFilePath = Path.Combine(mzmlFileInfo.DirectoryName, PBF_TEST_FILE); try { // Create a new lock file so other processes know this thread is creating the .pbf file WaitForLockFile(lockFile, true); mLastStatus = string.Empty; var startTime = DateTime.UtcNow; var reader = MassSpecDataReaderFactory.GetMassSpecDataReader(mzmlFileInfo.FullName); var progress = new Progress <ProgressData>(p => { p.UpdateFrequencySeconds = 2; if (p.Percent < 100 && (p.Percent % 25).Equals(0) || p.ShouldUpdate()) { var statusMessage = string.Format("{0}, {1:00.0}% complete ", p.Status, p.Percent); if (string.Equals(mLastStatus, statusMessage)) { return; } mLastStatus = statusMessage; Console.Write("\r{0}, {1:00.0}% complete ", p.Status, p.Percent); } }); var run = new PbfLcMsRun(mzmlFileInfo.FullName, reader, newPbfFilePath, 0, 0, progress); Console.WriteLine(); ShowMessage(methodName, string.Format("Created {0} in {1:F0} seconds", run.PbfFilePath, DateTime.UtcNow.Subtract(startTime).TotalSeconds)); DeleteLockFile(lockFile); return(run.PbfFilePath); } catch (Exception ex) { ShowMessage(methodName, string.Format("Exception creating {0} using {1}: {2}", PBF_TEST_FILE, mzmlFileInfo.FullName, ex.Message)); try { var incompletePbfFile = new FileInfo(newPbfFilePath); if (incompletePbfFile.Exists) { incompletePbfFile.Delete(); } DeleteLockFile(lockFile); } catch { // Ignore errors here } // Return the default path, despite the fact that the file does not exist return(pbfFilePath); } }
public void TestThermoRawAvailable() { Assert.AreEqual(true, MassSpecDataReaderFactory.IsThermoRawAvailable()); }
public void TestPwizAvailable() { Assert.AreEqual(true, MassSpecDataReaderFactory.IsPwizAvailable()); }
public static int Main(string[] args) { PbfGenInputParameters options; try { var osVersionInfo = new clsOSVersionInfo(); if (osVersionInfo.GetOSVersion().ToLower().Contains("windows")) { var handle = Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, EnableExtendedFlags); } var exeName = System.Reflection.Assembly.GetEntryAssembly().GetName().Name; var parser = new CommandLineParser <PbfGenInputParameters>(exeName, Version); parser.UsageExamples.Add($"Using -start and -end to limit the scan range to include in the .pbf file\n\t{exeName}.exe -s Dataset.raw -start 2000 -end 3000"); var results = parser.ParseArgs(args); if (!results.Success) { // Wait for 1.5 seconds System.Threading.Thread.Sleep(1500); return(-1); } if (!results.ParsedResults.Validate()) { parser.PrintHelp(); // Wait for 1.5 seconds System.Threading.Thread.Sleep(1500); return(-1); } options = results.ParsedResults; } catch (Exception ex) { Console.WriteLine("Exception while parsing the command line parameters: " + ex.Message); return(-5); } #if (!DEBUG) try #endif { var specFilePaths = new[] { options.SourcePath }; if (Directory.Exists(options.SourcePath) && !MassSpecDataReaderFactory.IsADirectoryDataset(options.SourcePath)) { specFilePaths = Directory.GetFiles(options.SourcePath, "*.raw"); // TODO: Support folders with other formats in them too... } foreach (var rawFilePath in specFilePaths) { var pbfFileName = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, PbfLcMsRun.FileExtensionConst); var pbfFilePath = Path.Combine(options.OutputDir, Path.GetFileName(pbfFileName)); if (File.Exists(pbfFilePath) && PbfLcMsRun.CheckFileFormatVersion(pbfFilePath, out var isCurrent) && isCurrent) { Console.WriteLine("{0} already exists.", pbfFilePath); continue; } Console.WriteLine("Creating {0} from {1}", pbfFilePath, rawFilePath); if (options.StartScan > 0 && options.EndScan > 0) { Console.WriteLine("Only including scans {0} to {1}", options.StartScan, options.EndScan); } else if (options.StartScan > 0) { Console.WriteLine("Only including scans {0} to the end", options.StartScan); } else if (options.EndScan > 0) { Console.WriteLine("Only including scans 1 to {0}", options.EndScan); } var reader = MassSpecDataReaderFactory.GetMassSpecDataReader(rawFilePath); var progress = new Progress <ProgressData>(p => { p.UpdateFrequencySeconds = 2; if ((p.Percent % 25).Equals(0) || p.ShouldUpdate()) { Console.Write("\r{0}, {1:00.0}% complete ", p.Status, p.Percent); } }); var run = new PbfLcMsRun(rawFilePath, reader, pbfFilePath, 0, 0, progress, false, options.StartScan, options.EndScan); Console.WriteLine(); } Console.WriteLine("PbfFormatVersion: {0}", PbfLcMsRun.FileFormatId); return(0); } #if (!DEBUG) catch (Exception ex) { // NOTE: The DMS Analysis Manager looks for this text; do not change it Console.WriteLine("Exception while processing: " + ex.Message); Console.WriteLine(ex.StackTrace); var errorCode = -Math.Abs(ex.Message.GetHashCode()); if (errorCode == 0) { return(-1); } else { return(errorCode); } } #endif }
public void TestFeatureIdMatching() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var resultParser = new MsPathFinderParser(resultFilePath); const double qValueThreshold = 0.01; const double tolerancePpm = 13; const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); if (!File.Exists(rawFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName); } var run = PbfLcMsRun.GetLcMsRun(rawFileName); var idList = resultParser.GetIdList().TakeWhile(id => id.QValue <= qValueThreshold).OrderBy(id => id.Mass).ToList(); var idMassList = idList.Select(id => id.Mass).ToList(); var idFlag = new bool[idList.Count]; // Parse sequence tags var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag"); const int minTagLength = 6; const int numProtMatches = 4; // const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta"; if (!File.Exists(tagFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft"); var featureParser = new TsvFileParser(featureFileName); var minScan = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray(); var maxScan = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray(); var minCharge = featureParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray(); var maxCharge = featureParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray(); var monoMass = featureParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray(); var numFeaturesWithId = 0; var numFeaturesWithMs2 = 0; var numFeaturesWithTags = 0; var numFeaturesWithMatchingTags = 0; var numFeaturesWithTwoOrMoreMatchingTags = 0; var numFeaturesWithNoIdAndMatchingTags = 0; for (var i = 0; i < featureParser.NumData; i++) { var mass = monoMass[i]; // Find Id var tolDa = new Tolerance(tolerancePpm).GetToleranceAsDa(mass, 1); var minMass = mass - tolDa; var maxMass = mass + tolDa; var index = idMassList.BinarySearch(mass); if (index < 0) { index = ~index; } var matchedId = new List <MsPathFinderId>(); // go down var curIndex = index - 1; while (curIndex >= 0) { var curId = idList[curIndex]; if (curId.Mass < minMass) { break; } if (curId.Scan > minScan[i] && curId.Scan < maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } --curIndex; } // go up curIndex = index; while (curIndex < idList.Count) { var curId = idList[curIndex]; if (curId.Mass > maxMass) { break; } if (curId.Scan >= minScan[i] && curId.Scan <= maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } ++curIndex; } var hasId = false; if (matchedId.Any()) { ++numFeaturesWithId; hasId = true; } // Find MS2 scans // var numMs2Scans = 0; var tags = new List <SequenceTag>(); var hasMs2 = false; for (var scanNum = minScan[i]; scanNum <= maxScan[i]; scanNum++) { var isolationWindow = run.GetIsolationWindow(scanNum); if (isolationWindow == null) { continue; } var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz; var charge = (int)Math.Round(mass / isolationWindowTargetMz); if (charge < minCharge[i] || charge > maxCharge[i]) { continue; } var mz = Ion.GetIsotopeMz(mass, charge, Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex); if (isolationWindow.Contains(mz)) { // ++numMs2Scans; tags.AddRange(tagParser.GetSequenceTags(scanNum)); hasMs2 = true; } } if (hasMs2) { ++numFeaturesWithMs2; } if (tags.Any()) { ++numFeaturesWithTags; } var protHist = new Dictionary <string, int>(); var hasMatchedTag = false; foreach (var tag in tags) { var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).Select(idx => fastaDb.GetProteinName(idx)).ToArray(); if (matchedProteins.Any()) { hasMatchedTag = true; foreach (var protein in matchedProteins) { int num; if (protHist.TryGetValue(protein, out num)) { protHist[protein] = num + 1; } else { protHist[protein] = 1; } } } } if (hasMatchedTag) { ++numFeaturesWithMatchingTags; if (!hasId) { ++numFeaturesWithNoIdAndMatchingTags; } } if (protHist.Any()) { var maxOcc = protHist.Values.Max(); if (maxOcc >= numProtMatches) { ++numFeaturesWithTwoOrMoreMatchingTags; } } } Console.WriteLine("NumFeatures: {0}", featureParser.NumData); Console.WriteLine("NumId: {0}", idList.Count); Console.WriteLine("NumFeaturesWithId: {0} ({1})", numFeaturesWithId, numFeaturesWithId / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMs2: {0} ({1})", numFeaturesWithMs2, numFeaturesWithMs2 / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithTag: {0} ({1})", numFeaturesWithTags, numFeaturesWithTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMatchedTag: {0} ({1})", numFeaturesWithMatchingTags, numFeaturesWithMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMoreThanOneMatchedTag: {0} ({1})", numFeaturesWithTwoOrMoreMatchingTags, numFeaturesWithTwoOrMoreMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithNoIdAndMatchedTag: {0} ({1})", numFeaturesWithNoIdAndMatchingTags, numFeaturesWithNoIdAndMatchingTags / (float)featureParser.NumData); for (var i = 0; i < idFlag.Length; i++) { if (!idFlag[i]) { Console.WriteLine(idList[i].Scan); } } // Console.WriteLine(string.Join(",", filter.GetMatchingMs2ScanNums(8115.973001))); // // Console.WriteLine(featureFileName); }
/// <summary> /// Process spectra and identifications. /// </summary> /// <param name="rawFilePath">Full file path to raw file.</param> /// <param name="idFilePath">Full file path to identification file.</param> /// <param name="cancellationToken">For notification of cancellation.</param> /// <param name="progress">Progress reporter.</param> /// <returns>List of processed IDs.</returns> public List <ProcessedResult> Process(string rawFilePath, string idFilePath, CancellationToken cancellationToken, IProgress <ProgressData> progress = null) { // Set up progress reporter progress = progress ?? new Progress <ProgressData>(); var progressData = new ProgressData(progress); // Show initial loading message progressData.Report(0.1, "Loading..."); // Read mzid file var mzidReader = new SimpleMZIdentMLReader(); var identifications = mzidReader.Read(idFilePath, cancellationToken); // Check to make sure raw and MZID file match. var rawFileName = Path.GetFileNameWithoutExtension(rawFilePath); var spectrumFileFromId = Path.GetFileNameWithoutExtension(identifications.SpectrumFile); var dtaIndex = spectrumFileFromId.LastIndexOf("_dta"); if (dtaIndex >= 0) { spectrumFileFromId = spectrumFileFromId.Substring(0, dtaIndex); } if (rawFileName != spectrumFileFromId) { throw new ArgumentException($"Mismatch between spectrum file ({rawFileName}) and id file ({spectrumFileFromId})."); } // Group IDs into a hash by scan number var idMap = identifications.Identifications.GroupBy(id => id.ScanNum).ToDictionary(scan => scan.Key, ids => ids); var processedResults = new ConcurrentBag <ProcessedResult>(); // Load raw file using (var lcms = MassSpecDataReaderFactory.GetMassSpecDataReader(rawFilePath)) { int count = 0; Parallel.ForEach( lcms.ReadAllSpectra(), spectrum => { if (cancellationToken.IsCancellationRequested) { // Cancel if necessary return; } // Report completion percentage and current scan number if (count % (int)Math.Max(0.01 * lcms.NumSpectra, 1) == 0) { progressData.Report(count, lcms.NumSpectra, $"{Math.Round(100.0*count / lcms.NumSpectra)}%"); } Interlocked.Increment(ref count); // Skip spectrum if it isn't MS2 var productSpectrum = spectrum as ProductSpectrum; if (productSpectrum == null || !idMap.ContainsKey(spectrum.ScanNum)) { return; } var specResults = idMap[spectrum.ScanNum]; var results = from specResult in specResults let sequence = specResult.Peptide.GetIpSequence() let coverage = this.CalculateSequenceCoverage(productSpectrum, sequence, specResult.Charge) select new ProcessedResult { ScanNum = spectrum.ScanNum, Sequence = sequence, Charge = specResult.Charge, PrecursorMz = specResult.CalMz, DeNovoScore = specResult.DeNovoScore, SpecEValue = specResult.SpecEv, EValue = specResult.EValue, QValue = specResult.QValue, PepQValue = specResult.PepQValue, FragMethod = productSpectrum.ActivationMethod, IsotopeError = specResult.IsoError, SequenceCoverage = Math.Round(coverage), }; foreach (var result in results) { processedResults.Add(result); } }); } // Sort spectra by SpecEValue return(processedResults.OrderBy(pr => pr.SpecEValue).ToList()); }
public bool Validate() { // Spec file path validation if (string.IsNullOrWhiteSpace(SpecFilePath)) { PrintError("Missing parameter for spectrum file path"); return(false); } // Check for folder-type datasets, and replace specFilePath with the directory name if it is. SpecFilePath = MassSpecDataReaderFactory.GetDatasetName(SpecFilePath); var isDirectoryDataset = MassSpecDataReaderFactory.IsADirectoryDataset(SpecFilePath); // True if specFilePath is a directory that is NOT a supported folder-type dataset. var specPathIsDirectory = Directory.Exists(SpecFilePath) && !isDirectoryDataset; if (!File.Exists(SpecFilePath) && !specPathIsDirectory && !isDirectoryDataset) { PrintError("File not found: " + SpecFilePath); return(false); } var types = MassSpecDataReaderFactory.MassSpecDataTypeFilterList; if (!specPathIsDirectory && !(types.Select(ext => SpecFilePath.ToLower().EndsWith(ext)).Any())) { PrintError("Invalid file extension for spectrum file: (" + Path.GetExtension(SpecFilePath) + ") " + SpecFilePath); return(false); } // TODO: Handle non-.raw files in the subfolder SpecFilePaths = Directory.Exists(SpecFilePath) && !MassSpecDataReaderFactory.IsADirectoryDataset(SpecFilePath) ? Directory.GetFiles(SpecFilePath, "*.raw") : new[] { SpecFilePath }; // Database path validation if (string.IsNullOrWhiteSpace(DatabaseFilePath)) { PrintError("Missing parameter for database file path"); return(false); } if (!File.Exists(DatabaseFilePath)) { PrintError("File not found: " + DatabaseFilePath); return(false); } var dbExtension = Path.GetExtension(DatabaseFilePath).ToLower(); if (!dbExtension.Equals(".fa") && !dbExtension.Equals(".fasta")) { PrintError("Invalid extension for the database file path (" + dbExtension + ")"); return(false); } // Output directory validation if (string.IsNullOrWhiteSpace(OutputDir)) { // Must use "Path.GetFullPath" to return the absolute path when the source file is in the working directory // But, it could cause problems with too-long paths. OutputDir = specPathIsDirectory ? SpecFilePath : Path.GetDirectoryName(Path.GetFullPath(SpecFilePath)); } if (string.IsNullOrWhiteSpace(OutputDir)) { PrintError("Invalid output file directory: " + OutputDir); return(false); } if (!Directory.Exists(OutputDir)) { if (File.Exists(OutputDir) && !File.GetAttributes(OutputDir).HasFlag(FileAttributes.Directory)) { PrintError("OutputDir \"" + OutputDir + "\" is not a directory!"); return(false); } Directory.CreateDirectory(OutputDir); } // Mods file validation if (!string.IsNullOrWhiteSpace(ModsFilePath) && !File.Exists(ModsFilePath)) { PrintError("Modifications file not found: " + ModsFilePath); return(false); } try { var errorMessage = LoadModsFile(ModsFilePath); if (!string.IsNullOrWhiteSpace(errorMessage)) { PrintError(errorMessage); return(false); } } catch (Exception ex) { PrintError("Exception parsing the file for parameter -mod: " + ex.Message); return(false); } // Scans file validation if (!string.IsNullOrWhiteSpace(ScansFilePath) && !File.Exists(ScansFilePath)) { PrintError("Scans File file not found: " + ScansFilePath); return(false); } try { var errorMessage = LoadScansFile(ScansFilePath); if (!string.IsNullOrWhiteSpace(errorMessage)) { PrintError(errorMessage); return(false); } } catch (Exception ex) { PrintError("Exception parsing the file for parameter -scansFile: " + ex.Message); return(false); } // Feature file validation if (!string.IsNullOrWhiteSpace(FeatureFilePath) && !File.Exists(FeatureFilePath)) { PrintError("Feature File not found: " + FeatureFilePath); return(false); } if (!string.IsNullOrWhiteSpace(FeatureFilePath) && !Path.GetExtension(FeatureFilePath).ToLower().Equals(".csv") && !Path.GetExtension(FeatureFilePath).ToLower().Equals(".ms1ft") && !Path.GetExtension(FeatureFilePath).ToLower().Equals(".msalign")) { PrintError("Invalid extension for the Feature file path (" + Path.GetExtension(FeatureFilePath) + ")"); return(false); } // MinX/MaxX validation if (MinSequenceLength > MaxSequenceLength) { PrintError("MinPrecursorCharge (" + MinPrecursorIonCharge + ") is larger than MaxPrecursorCharge (" + MaxPrecursorIonCharge + ")!"); return(false); } if (MinProductIonCharge > MaxProductIonCharge) { PrintError("MinFragmentCharge (" + MinProductIonCharge + ") is larger than MaxFragmentCharge (" + MaxProductIonCharge + ")!"); return(false); } if (MinSequenceMass > MaxSequenceMass) { PrintError("MinSequenceMassInDa (" + MinSequenceMass + ") is larger than MaxSequenceMassInDa (" + MaxSequenceMass + ")!"); return(false); } MaxNumThreads = GetOptimalMaxThreads(MaxNumThreads); return(true); }
public bool RunSearch(double corrThreshold = 0.7, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null) { // Get the Normalized spec file/folder path SpecFilePath = MassSpecDataReaderFactory.NormalizeDatasetPath(SpecFilePath); var prog = new Progress <ProgressData>(); var progData = new ProgressData(progress); if (progress != null) { prog = new Progress <ProgressData>(p => { progData.Status = p.Status; progData.StatusInternal = p.StatusInternal; progData.Report(p.Percent); }); } var sw = new Stopwatch(); var swAll = new Stopwatch(); swAll.Start(); ErrorMessage = string.Empty; Console.Write(@"Reading raw file..."); progData.Status = "Reading spectra file"; progData.StepRange(10.0); sw.Start(); _run = PbfLcMsRun.GetLcMsRun(SpecFilePath, 0, 0, prog); _ms2ScanNums = _run.GetScanNumbers(2).ToArray(); _isolationWindowTargetMz = new double[_run.MaxLcScan + 1]; foreach (var ms2Scan in _ms2ScanNums) { var ms2Spec = _run.GetSpectrum(ms2Scan) as ProductSpectrum; if (ms2Spec == null) { continue; } _isolationWindowTargetMz[ms2Scan] = ms2Spec.IsolationWindow.IsolationWindowTargetMz; } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.StepRange(20.0); ISequenceFilter ms1Filter; if (this.ScanNumbers != null && this.ScanNumbers.Any()) { ms1Filter = new SelectedMsMsFilter(this.ScanNumbers); } else if (string.IsNullOrWhiteSpace(FeatureFilePath)) { // Checks whether SpecFileName.ms1ft exists var ms1FtFilePath = MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, LcMsFeatureFinderLauncher.FileExtension); if (!File.Exists(ms1FtFilePath)) { Console.WriteLine(@"Running ProMex..."); sw.Start(); var param = new LcMsFeatureFinderInputParameter { InputPath = SpecFilePath, MinSearchMass = MinSequenceMass, MaxSearchMass = MaxSequenceMass, MinSearchCharge = MinPrecursorIonCharge, MaxSearchCharge = MaxPrecursorIonCharge, CsvOutput = false, ScoreReport = false, LikelihoodScoreThreshold = -10 }; var featureFinder = new LcMsFeatureFinderLauncher(param); featureFinder.Run(); } sw.Reset(); sw.Start(); Console.Write(@"Reading ProMex results..."); ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, ms1FtFilePath, -10); } else { sw.Reset(); sw.Start(); var extension = Path.GetExtension(FeatureFilePath); if (extension.ToLower().Equals(".csv")) { Console.Write(@"Reading ICR2LS/Decon2LS results..."); ms1Filter = new IsosFilter(_run, PrecursorIonTolerance, FeatureFilePath); } else if (extension.ToLower().Equals(".ms1ft")) { Console.Write(@"Reading ProMex results..."); ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, FeatureFilePath, -10); } else if (extension.ToLower().Equals(".msalign")) { Console.Write(@"Reading MS-Align+ results..."); ms1Filter = new MsDeconvFilter(_run, PrecursorIonTolerance, FeatureFilePath); } else { ms1Filter = null; //new Ms1FeatureMatrix(_run); } } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // pre-generate deconvoluted spectra for scoring _massBinComparer = new FilteredProteinMassBinning(AminoAcidSet, MaxSequenceMass + 1000); _ms2ScorerFactory2 = new CompositeScorerFactory(_run, _massBinComparer, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance); sw.Reset(); Console.WriteLine(@"Generating deconvoluted spectra for MS/MS spectra..."); sw.Start(); var pfeOptions = new ParallelOptions { MaxDegreeOfParallelism = MaxNumThreads, CancellationToken = cancellationToken ?? CancellationToken.None }; Parallel.ForEach(_ms2ScanNums, pfeOptions, ms2ScanNum => { _ms2ScorerFactory2.DeconvonluteProductSpectrum(ms2ScanNum); }); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.StepRange(10.0); progData.Status = "Reading Fasta File"; // Target database var targetDb = new FastaDatabase(DatabaseFilePath); targetDb.Read(); // Generate sequence tags for all MS/MS spectra if (TagBasedSearch) { progData.StepRange(25.0); progData.Status = "Generating Sequence Tags"; sw.Reset(); Console.WriteLine(@"Generating sequence tags for MS/MS spectra..."); sw.Start(); var seqTagGen = GetSequenceTagGenerator(); _tagMs2ScanNum = seqTagGen.GetMs2ScanNumsContainingTags().ToArray(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); _tagSearchEngine = new ScanBasedTagSearchEngine(_run, seqTagGen, new LcMsPeakMatrix(_run, ms1Filter), targetDb, ProductIonTolerance, AminoAcidSet, _ms2ScorerFactory2, ScanBasedTagSearchEngine.DefaultMinMatchedTagLength, MaxSequenceMass, MinProductIonCharge, MaxProductIonCharge); } var specFileName = MassSpecDataReaderFactory.RemoveExtension(Path.GetFileName(SpecFilePath)); var targetOutputFilePath = Path.Combine(OutputDir, specFileName + TargetFileNameEnding); var decoyOutputFilePath = Path.Combine(OutputDir, specFileName + DecoyFileNameEnding); var tdaOutputFilePath = Path.Combine(OutputDir, specFileName + TdaFileNameEnding); progData.StepRange(60.0); progData.Status = "Running Target search"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target)) { sw.Reset(); Console.Write(@"Reading the target database..."); sw.Start(); targetDb.Read(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); var targetMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1]; progData.MaxPercentage = 42.5; if (TagBasedSearch) { sw.Reset(); Console.WriteLine(@"Tag-based searching the target database"); sw.Start(); RunTagBasedSearch(targetMatches, targetDb, null, prog); Console.WriteLine(@"Target database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.MaxPercentage = 60.0; sw.Reset(); Console.WriteLine(@"Searching the target database"); sw.Start(); RunSearch(targetMatches, targetDb, ms1Filter, null, prog); Console.WriteLine(@"Target database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // calculate spectral e-value usign generating function sw.Reset(); Console.WriteLine(@"Calculating spectral E-values for target-spectrum matches"); sw.Start(); var bestTargetMatches = RunGeneratingFunction(targetMatches); WriteResultsToFile(bestTargetMatches, targetOutputFilePath, targetDb); sw.Stop(); Console.WriteLine(@"Target-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.StepRange(95.0); // total to 95% progData.Status = "Running Decoy search"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy)) { // Decoy database sw.Reset(); sw.Start(); var decoyDb = targetDb.Decoy(null, true); Console.Write(@"Reading the decoy database..."); decoyDb.Read(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.MaxPercentage = 77.5; var decoyMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1]; if (TagBasedSearch) { sw.Reset(); Console.WriteLine(@"Tag-based searching the decoy database"); sw.Start(); RunTagBasedSearch(decoyMatches, decoyDb, null, prog); Console.WriteLine(@"Decoy database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.MaxPercentage = 95.0; sw.Reset(); Console.WriteLine(@"Searching the decoy database"); sw.Start(); RunSearch(decoyMatches, decoyDb, ms1Filter, null, prog); Console.WriteLine(@"Decoy database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // calculate spectral e-value usign generating function sw.Reset(); Console.WriteLine(@"Calculating spectral E-values for decoy-spectrum matches"); sw.Start(); var bestDecoyMatches = RunGeneratingFunction(decoyMatches); WriteResultsToFile(bestDecoyMatches, decoyOutputFilePath, decoyDb); sw.Stop(); Console.WriteLine(@"Decoy-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.StepRange(100.0); progData.Status = "Writing combined results file"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both)) { // Add "Qvalue" and "PepQValue" var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath); if (fdrCalculator.HasError()) { ErrorMessage = fdrCalculator.ErrorMessage; Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage); return(false); } fdrCalculator.WriteTo(tdaOutputFilePath); } progData.Report(100.0); Console.WriteLine(@"Done."); swAll.Stop(); Console.WriteLine(@"Total elapsed time for search: {0:f1} sec ({1:f2} min)", swAll.Elapsed.TotalSeconds, swAll.Elapsed.TotalMinutes); return(true); }
public void Write() { foreach (var specFilePath in SpecFilePaths) { var outputFilePath = Path.Combine(OutputDir, Path.GetFileNameWithoutExtension(specFilePath) + ParameterFileExtension); using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("SpecFile\t" + Path.GetFileName(specFilePath)); writer.WriteLine("DatabaseFile\t" + Path.GetFileName(DatabaseFilePath)); writer.WriteLine("FeatureFile\t{0}", FeatureFilePath != null ? Path.GetFileName(FeatureFilePath) : Path.GetFileName(MassSpecDataReaderFactory.ChangeExtension(specFilePath, ".ms1ft"))); writer.WriteLine("SearchMode\t" + SearchModeInt); writer.WriteLine("Tag-based search\t" + TagBasedSearch); writer.WriteLine("Tda\t" + (TdaBool == null ? "Decoy" : (bool)TdaBool ? "Target+Decoy" : "Target")); writer.WriteLine("PrecursorIonTolerancePpm\t" + PrecursorIonTolerancePpm); writer.WriteLine("ProductIonTolerancePpm\t" + ProductIonTolerancePpm); writer.WriteLine("MinSequenceLength\t" + MinSequenceLength); writer.WriteLine("MaxSequenceLength\t" + MaxSequenceLength); writer.WriteLine("MinPrecursorIonCharge\t" + MinPrecursorIonCharge); writer.WriteLine("MaxPrecursorIonCharge\t" + MaxPrecursorIonCharge); writer.WriteLine("MinProductIonCharge\t" + MinProductIonCharge); writer.WriteLine("MaxProductIonCharge\t" + MaxProductIonCharge); writer.WriteLine("MinSequenceMass\t" + MinSequenceMass); writer.WriteLine("MaxSequenceMass\t" + MaxSequenceMass); writer.WriteLine("MaxDynamicModificationsPerSequence\t" + _maxNumDynModsPerSequence); foreach (var searchMod in _searchModifications) { writer.WriteLine("Modification\t" + searchMod); } } } }
public void Write() { var outputFilePath = Path.Combine(OutputDir, Path.GetFileNameWithoutExtension(SpecFilePath) + ParameterFileExtension); using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("SpecFile\t" + Path.GetFileName(SpecFilePath)); writer.WriteLine("DatabaseFile\t" + Path.GetFileName(DatabaseFilePath)); writer.WriteLine("FeatureFile\t{0}", !string.IsNullOrWhiteSpace(FeatureFilePath) ? Path.GetFileName(FeatureFilePath) : Path.GetFileName(MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, ".ms1ft"))); writer.WriteLine("InternalCleavageMode\t" + InternalCleavageMode); writer.WriteLine("Tag-based search\t" + TagBasedSearch); writer.WriteLine("Tda\t" + (TargetDecoySearchMode == DatabaseSearchMode.Both ? "Target+Decoy" : TargetDecoySearchMode.ToString())); writer.WriteLine("PrecursorIonTolerancePpm\t" + PrecursorIonTolerancePpm); writer.WriteLine("ProductIonTolerancePpm\t" + ProductIonTolerancePpm); writer.WriteLine("MinSequenceLength\t" + MinSequenceLength); writer.WriteLine("MaxSequenceLength\t" + MaxSequenceLength); writer.WriteLine("MinPrecursorIonCharge\t" + MinPrecursorIonCharge); writer.WriteLine("MaxPrecursorIonCharge\t" + MaxPrecursorIonCharge); writer.WriteLine("MinProductIonCharge\t" + MinProductIonCharge); writer.WriteLine("MaxProductIonCharge\t" + MaxProductIonCharge); writer.WriteLine("MinSequenceMass\t" + MinSequenceMass); writer.WriteLine("MaxSequenceMass\t" + MaxSequenceMass); writer.WriteLine("MaxDynamicModificationsPerSequence\t" + MaxDynamicModificationsPerSequence); foreach (var searchMod in Modifications) { writer.WriteLine("Modification\t" + searchMod); } } }
public void TestReadingPbfFile() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"TopDown\ProductionQCShew\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.pbf"); if (!File.Exists(pbfFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pbfFilePath); } var pbfRun = new PbfLcMsRun(pbfFilePath); var checksum = pbfRun.PbfFileChecksum; var specFilePath = Path.ChangeExtension(pbfFilePath, "raw"); if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } Console.WriteLine(@"Loading .pbf into memory"); //var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); var run = new InMemoryLcMsRun(MassSpecDataReaderFactory.GetMassSpecDataReader(specFilePath), 0, 0); Console.WriteLine(@"Comparing spectra between .pbf and in-memory spectra"); // spectrum comparison //for (var scanNum = run.MinLcScan; scanNum <= run.MaxLcScan; scanNum++) foreach (var scanNum in run.AllScanNumbers) { var spec1 = run.GetSpectrum(scanNum); var spec2 = pbfRun.GetSpectrum(scanNum); Assert.IsTrue(spec1.Peaks.Length == spec2.Peaks.Length); for (var i = 0; i < spec1.Peaks.Length; i++) { var p1 = spec1.Peaks[i]; var p2 = spec2.Peaks[i]; Assert.True(p1.Equals(p2)); Assert.True(Math.Abs(p1.Mz - p2.Mz) < 1e-8); Assert.True(Math.Abs(p1.Intensity - p2.Intensity) < 0.001); } } Console.WriteLine(@"Comparing XICs"); // chromatogram comparison const double targetMz = 655.01; var tolerance = new Tolerance(10); var xic1 = run.GetFullPrecursorIonExtractedIonChromatogram(targetMz, tolerance); var xic2 = pbfRun.GetFullPrecursorIonExtractedIonChromatogram(targetMz, tolerance); Assert.True(xic1.Count == xic2.Count); for (var i = 0; i < xic1.Count; i++) { if (!xic1[i].Equals(xic2[i])) { Console.WriteLine(@"{0} {1} {2}", i, xic1[i], xic2[i]); } Assert.True(xic1[i].Equals(xic2[i])); } Console.WriteLine(@"Done"); }
public void TestTagMatching() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parse sequence tags const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; const int minTagLength = 8; var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag"); if (!File.Exists(tagFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName); } var tagParser = new SequenceTagParser(tagFileName, minTagLength); // Parse raw file var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); if (!File.Exists(rawFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName); } var run = PbfLcMsRun.GetLcMsRun(rawFileName); // Parse ID file const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var resultParser = new MsPathFinderParser(resultFilePath); const double qValueThreshold = 0.01; var idList = resultParser.GetIdWithQValuesNoLargerThan(qValueThreshold); var idFlag = new bool[run.MaxLcScan + 1]; foreach (var id in idList) { idFlag[id.Scan] = true; } const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } // const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta"; var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var numMs2Spectra = 0; var numSpectraWithTag = 0; var numSpectraWithMatchingTag = 0; var numSpectraWithMatchedTagNoId = 0; foreach (var ms2ScanNum in run.GetScanNumbers(2)) { ++numMs2Spectra; var tags = tagParser.GetSequenceTags(ms2ScanNum); if (tags != null) { ++numSpectraWithTag; foreach (var tag in tags) { if (searchableDb.Search(tag.Sequence) >= 0) { //Console.WriteLine(tag.Sequence); ++numSpectraWithMatchingTag; if (!idFlag[ms2ScanNum]) { ++numSpectraWithMatchedTagNoId; } break; } } } } Console.WriteLine("Tag length: {0}", minTagLength); Console.WriteLine("NumMs2Spectra: {0}", numMs2Spectra); Console.WriteLine("NumMs2SpectraWithTags: {0} ({1})", numSpectraWithTag, numSpectraWithTag / (float)numMs2Spectra); Console.WriteLine("NumMs2SpectraWithMatchingTags: {0} ({1})", numSpectraWithMatchingTag, numSpectraWithMatchingTag / (float)numMs2Spectra); Console.WriteLine("NumMs2SpectraWithMatchingTagsWithNoId: {0} ({1})", numSpectraWithMatchedTagNoId, numSpectraWithMatchedTagNoId / (float)numMs2Spectra); }
public static int Main(string[] args) { string specFilePath; string outputDir; try { var handle = Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, EnableExtendedFlags); if (args.Length == 0) { PrintUsageInfo(); return(-1); } var paramDic = new Dictionary <string, string> { { "-s", null }, { "-o", null } }; for (var i = 0; i < args.Length / 2; i++) { var key = args[2 * i]; var value = args[2 * i + 1]; if (!paramDic.ContainsKey(key)) { PrintUsageInfo("Invalid parameter: " + key); return(-1); } paramDic[key] = value; } // Parse command line parameters specFilePath = paramDic["-s"]; if (specFilePath == null) { PrintUsageInfo("Missing required parameter -s!"); return(-1); } // Check for folder-type datasets, and replace specFilePath with the directory name if it is. specFilePath = MassSpecDataReaderFactory.GetDatasetName(specFilePath); var isDirectoryDataset = MassSpecDataReaderFactory.IsADirectoryDataset(specFilePath); // True if specFilePath is a directory that is NOT a supported folder-type dataset. var specPathIsDirectory = Directory.Exists(specFilePath) && !isDirectoryDataset; if (!File.Exists(specFilePath) && !specPathIsDirectory && !isDirectoryDataset) { PrintUsageInfo("File not found: " + specFilePath); return(-1); } var types = MassSpecDataReaderFactory.MassSpecDataTypeFilterList; types.Remove(".pbf"); if (!specPathIsDirectory && !(types.Select(ext => specFilePath.ToLower().EndsWith(ext)).Any())) { PrintUsageInfo("Invalid file extension: (" + Path.GetExtension(specFilePath) + ") " + specFilePath); return(-1); } // Must use "Path.GetFullPath" to return the absolute path when the source file is in the working directory // But, it could cause problems with too-long paths. outputDir = paramDic["-o"] ?? (specPathIsDirectory ? specFilePath : Path.GetDirectoryName(Path.GetFullPath(specFilePath))); if (outputDir == null) { PrintUsageInfo("Invalid output file directory: " + specFilePath); return(-1); } if (!Directory.Exists(outputDir)) { if (File.Exists(outputDir) && !File.GetAttributes(outputDir).HasFlag(FileAttributes.Directory)) { PrintUsageInfo("OutputDir " + outputDir + " is not a directory!"); return(-1); } Directory.CreateDirectory(outputDir); } } catch (Exception ex) { Console.WriteLine("Exception while parsing the command line parameters: " + ex.Message); return(-5); } #if (!DEBUG) try { #endif string[] specFilePaths = new[] { specFilePath }; if (Directory.Exists(specFilePath) && !MassSpecDataReaderFactory.IsADirectoryDataset(specFilePath)) { specFilePaths = Directory.GetFiles(specFilePath, "*.raw"); } foreach (var rawFilePath in specFilePaths) { var pbfFilePath = Path.Combine(outputDir, Path.GetFileNameWithoutExtension(rawFilePath) + PbfLcMsRun.FileExtension); bool isCurrent; if (File.Exists(pbfFilePath) && PbfLcMsRun.CheckFileFormatVersion(pbfFilePath, out isCurrent) && isCurrent) { Console.WriteLine("{0} already exists.", pbfFilePath); continue; } Console.WriteLine("Creating {0} from {1}", pbfFilePath, rawFilePath); IMassSpecDataReader reader = MassSpecDataReaderFactory.GetMassSpecDataReader(rawFilePath); var progress = new Progress <ProgressData>(p => { p.UpdateFrequencySeconds = 2; if ((p.Percent % 25).Equals(0) || p.ShouldUpdate()) { Console.Write("\r{0}, {1:00.0}% complete ", p.Status, p.Percent); } }); var run = new PbfLcMsRun(rawFilePath, reader, pbfFilePath, 0, 0, progress); Console.WriteLine(); } Console.WriteLine("PbfFormatVersion: {0}", PbfLcMsRun.FileFormatId); return(0); #if (!DEBUG) } catch (Exception ex) { // NOTE: The DMS Analysis Manager looks for this text; do not change it Console.WriteLine("Exception while processing: " + ex.Message); Console.WriteLine(ex.StackTrace); var errorCode = -Math.Abs(ex.Message.GetHashCode()); if (errorCode == 0) { return(-1); } else { return(errorCode); } } #endif }
public void TestFeatureId() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; if (!File.Exists(dataSet)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataSet); } // Feature: 5236-5286 6-12 8480.3681 5 const int minScanNum = 5236; const int maxScanNum = 5286; const double featureMass = 8480.3681; //const int minScanNum = 7251; //const int maxScanNum = 7326; //const double featureMass = 32347.18; // const int minScanNum = 4451; // const int maxScanNum = 4541; // const double featureMass = 31267.95; var tolerance = new Tolerance(10); var relaxedTolerance = new Tolerance(20); const int minTagLength = 5; const int minMergedTagLength = 7; const int minNumTagMatches = 1; var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); var run = PbfLcMsRun.GetLcMsRun(rawFileName); var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet(); var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft"); var filter = new Ms1FtFilter(run, tolerance, featureFileName); var ms2ScanNums = filter.GetMatchingMs2ScanNums(featureMass) .Where(scanNum => scanNum > minScanNum && scanNum < maxScanNum) .ToArray(); const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag"); const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var proteinsToTags = new Dictionary <string, IList <MatchedTag> >(); foreach (var ms2ScanNum in ms2ScanNums) { var tags = tagParser.GetSequenceTags(ms2ScanNum); foreach (var tag in tags) { var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray(); foreach (var index in matchedIndices) { var protein = fastaDb.GetProteinName(index); var startIndex = fastaDb.GetZeroBasedPositionInProtein(index); var matchedTag = new MatchedTag(tag, startIndex, featureMass); IList <MatchedTag> existingTags; if (proteinsToTags.TryGetValue(protein, out existingTags)) { existingTags.Add(matchedTag); } else { proteinsToTags.Add(protein, new List <MatchedTag> { matchedTag }); } } } } foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count)) { if (entry.Value.Count < minNumTagMatches) { break; } var proteinName = entry.Key; var proteinSequence = fastaDb.GetProteinSequence(proteinName); var protein = new Sequence(proteinSequence, aminoAcidSet); Console.WriteLine(proteinName + "\t" + entry.Value.Count); var matchedTagSet = new MatchedTagSet(proteinSequence, aminoAcidSet, tolerance, relaxedTolerance); Console.WriteLine("********** Before merging"); foreach (var matchedTag in entry.Value) { var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}", (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); matchedTagSet.Add(matchedTag); } Console.WriteLine("********** After merging"); foreach (var matchedTag in matchedTagSet.Tags) { if (matchedTag.Length < minMergedTagLength) { continue; } var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}", (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); } break; } }
/// <summary> /// Find features in the data file /// </summary> /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param> /// <returns>0 if success; negative number on error</returns> private int ProcessFile(string rawFile) { var outDirectory = GetOutputDirectory(rawFile); if (string.IsNullOrEmpty(outDirectory)) { return(-1); } var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile)); var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension); var outCsvFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv"); var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png"); if (File.Exists(ms1FeaturesFilePath)) { ShowErrorMessage("ProMex output already exists: " + ms1FeaturesFilePath); return(-2); } if (!File.Exists(rawFile)) { ShowErrorMessage("Cannot find input file: " + rawFile); return(-3); } var stopwatch = Stopwatch.StartNew(); Console.WriteLine("Start loading MS1 data from {0}", rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads); Console.WriteLine("Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); if (run.GetMs1ScanVector().Length == 0) { ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile)); return(-4); } if (featureFinder.Ms1PeakCount == 0) { ShowErrorMessage(@"Data file has no MS1 peaks: " + Path.GetFileName(rawFile)); return(-5); } var comparer = featureFinder.Comparer; var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass); var maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine("Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = processedBins / totalMassBin * 100; Console.WriteLine("Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine("Complete MS1 feature extraction."); Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(" - Number of extracted features = {0}", container.NumberOfFeatures); Console.WriteLine("Start selecting mutually independent features from feature network graph"); stopwatch.Restart(); var featureId = FilterAndOutputFeatures(container, featureFinder, outCsvFilePath, ms1FeaturesFilePath); Console.WriteLine("Complete feature filtration"); Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(" - Number of filtered features = {0}", featureId); Console.WriteLine(" - ProMex output: {0}", ms1FeaturesFilePath); if (Parameters.CsvOutput) { Console.WriteLine(" - ProMex output in ICR2LS format: {0}", outCsvFilePath); } if (Parameters.FeatureMapImage) { CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath); } return(0); }