public void TestPathUtils() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } Console.WriteLine(MassSpecDataReaderFactory.RemoveExtension(rawFilePath) + "_Target.tsv"); Console.WriteLine(Path.GetDirectoryName(rawFilePath)); Console.WriteLine(Path.Combine(Path.GetDirectoryName(rawFilePath), Path.GetFileNameWithoutExtension(rawFilePath) + "_IcTarget.tsv")); var outputDir = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\L1_1_Mode2\Synocho_L1_1_IcTarget.tsv"; if (!Directory.Exists(outputDir)) { if (!File.GetAttributes(outputDir).HasFlag(FileAttributes.Directory)) { throw new Exception(outputDir + " is not a directory!"); } Directory.CreateDirectory(outputDir); } Console.WriteLine(outputDir); }
/// <summary> /// Create a PNG image of previously found MS1 features /// </summary> /// <param name="pbfFilePath">.pbf file path</param> /// <param name="ms1FeaturesFilePath">.ms1ft file path</param> /// <returns>0 if success, otherwise an error code</returns> /// <remarks> /// If ms1FeaturesFilePath is an empty string, it is auto-determined based on the .pbf file name /// Mass range is determined using Parameters.MinSearchMass and Parameters.MaxSearchMass /// </remarks> public int CreateFeatureMapImage(string pbfFilePath, string ms1FeaturesFilePath) { if (!File.Exists(pbfFilePath)) { Console.WriteLine(@"Error: Data file not found: " + pbfFilePath); return(-1); } var outDirectory = GetOutputDirectory(pbfFilePath); if (string.IsNullOrEmpty(outDirectory)) { return(-2); } var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(pbfFilePath)); if (string.IsNullOrEmpty(ms1FeaturesFilePath) || string.Equals(ms1FeaturesFilePath, ".")) { ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension); } var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png"); if (!File.Exists(ms1FeaturesFilePath)) { Console.WriteLine(@"Error: MS1 features file not found: " + ms1FeaturesFilePath); return(-3); } Console.WriteLine(@"Start loading MS1 data from {0}", pbfFilePath); var run = PbfLcMsRun.GetLcMsRun(pbfFilePath); CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath); return(0); }
/// <summary> /// Find features in the data file /// </summary> /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param> /// <returns>0 if success; negative number on error</returns> private int ProcessFile(string rawFile) { var outDirectory = GetOutputDirectory(rawFile); if (string.IsNullOrEmpty(outDirectory)) { return(-1); } var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile)); var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension); var outCsvFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv"); var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png"); if (File.Exists(ms1FeaturesFilePath)) { Console.WriteLine(@"ProMex output already exists: {0}", ms1FeaturesFilePath); return(-2); } if (!File.Exists(rawFile)) { ShowErrorMessage(@"Cannot find input file: " + rawFile); return(-3); } var stopwatch = Stopwatch.StartNew(); Console.WriteLine(@"Start loading MS1 data from {0}", rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads); Console.WriteLine(@"Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); if (run.GetMs1ScanVector().Length == 0) { ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile)); return(-4); } var comparer = featureFinder.Comparer; var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass); var maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine(@"Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = ((double)processedBins / totalMassBin) * 100; Console.WriteLine(@"Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine(@"Complete MS1 feature extraction."); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of extracted features = {0}", container.NumberOfFeatures); Console.WriteLine(@"Start selecting mutually independent features from feature network graph"); stopwatch.Restart(); // write result files var tsvWriter = new StreamWriter(ms1FeaturesFilePath); tsvWriter.WriteLine(GetHeaderString(Parameters.ScoreReport)); StreamWriter csvWriter = null; if (Parameters.CsvOutput) { csvWriter = new StreamWriter(outCsvFilePath); csvWriter.WriteLine("scan_num,charge,abundance,mz,fit,monoisotopic_mw,FeatureID"); } var filteredFeatures = container.GetFilteredFeatures(featureFinder); var featureId = 0; foreach (var feature in filteredFeatures) { featureId++; tsvWriter.WriteLine("{0}\t{1}", featureId, GetString(feature, Parameters.ScoreReport)); var mostAbuIdx = feature.TheoreticalEnvelope.IndexOrderByRanking[0]; if (csvWriter != null) { foreach (var envelope in feature.EnumerateEnvelopes()) { //var mostAbuIsotopeInternalIndex = cluster.IsotopeList.SortedIndexByIntensity[0]; var mostAbuPeak = envelope.Peaks[mostAbuIdx]; if (mostAbuPeak == null || !mostAbuPeak.Active) { continue; } var fitscore = 1.0 - feature.BestCorrelationScore; csvWriter.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6}", envelope.ScanNum, envelope.Charge, envelope.Abundance, mostAbuPeak.Mz, fitscore, envelope.MonoMass, featureId)); } } } tsvWriter.Close(); Console.WriteLine(@"Complete feature filtration"); Console.WriteLine(@" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(@" - Number of filtered features = {0}", featureId); Console.WriteLine(@" - ProMex output: {0}", ms1FeaturesFilePath); if (csvWriter != null) { csvWriter.Close(); Console.WriteLine(@" - ProMex output in ICR2LS format: {0}", outCsvFilePath); } if (Parameters.FeatureMapImage) { CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath); } return(0); }
public bool RunSearch(double corrThreshold = 0.7, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null) { // Get the Normalized spec file/folder path SpecFilePath = MassSpecDataReaderFactory.NormalizeDatasetPath(SpecFilePath); var prog = new Progress <ProgressData>(); var progData = new ProgressData(progress); if (progress != null) { prog = new Progress <ProgressData>(p => { progData.Status = p.Status; progData.StatusInternal = p.StatusInternal; progData.Report(p.Percent); }); } var sw = new Stopwatch(); var swAll = new Stopwatch(); swAll.Start(); ErrorMessage = string.Empty; Console.Write(@"Reading raw file..."); progData.Status = "Reading spectra file"; progData.StepRange(10.0); sw.Start(); _run = PbfLcMsRun.GetLcMsRun(SpecFilePath, 0, 0, prog); _ms2ScanNums = _run.GetScanNumbers(2).ToArray(); _isolationWindowTargetMz = new double[_run.MaxLcScan + 1]; foreach (var ms2Scan in _ms2ScanNums) { var ms2Spec = _run.GetSpectrum(ms2Scan) as ProductSpectrum; if (ms2Spec == null) { continue; } _isolationWindowTargetMz[ms2Scan] = ms2Spec.IsolationWindow.IsolationWindowTargetMz; } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.StepRange(20.0); ISequenceFilter ms1Filter; if (this.ScanNumbers != null && this.ScanNumbers.Any()) { ms1Filter = new SelectedMsMsFilter(this.ScanNumbers); } else if (string.IsNullOrWhiteSpace(FeatureFilePath)) { // Checks whether SpecFileName.ms1ft exists var ms1FtFilePath = MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, LcMsFeatureFinderLauncher.FileExtension); if (!File.Exists(ms1FtFilePath)) { Console.WriteLine(@"Running ProMex..."); sw.Start(); var param = new LcMsFeatureFinderInputParameter { InputPath = SpecFilePath, MinSearchMass = MinSequenceMass, MaxSearchMass = MaxSequenceMass, MinSearchCharge = MinPrecursorIonCharge, MaxSearchCharge = MaxPrecursorIonCharge, CsvOutput = false, ScoreReport = false, LikelihoodScoreThreshold = -10 }; var featureFinder = new LcMsFeatureFinderLauncher(param); featureFinder.Run(); } sw.Reset(); sw.Start(); Console.Write(@"Reading ProMex results..."); ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, ms1FtFilePath, -10); } else { sw.Reset(); sw.Start(); var extension = Path.GetExtension(FeatureFilePath); if (extension.ToLower().Equals(".csv")) { Console.Write(@"Reading ICR2LS/Decon2LS results..."); ms1Filter = new IsosFilter(_run, PrecursorIonTolerance, FeatureFilePath); } else if (extension.ToLower().Equals(".ms1ft")) { Console.Write(@"Reading ProMex results..."); ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, FeatureFilePath, -10); } else if (extension.ToLower().Equals(".msalign")) { Console.Write(@"Reading MS-Align+ results..."); ms1Filter = new MsDeconvFilter(_run, PrecursorIonTolerance, FeatureFilePath); } else { ms1Filter = null; //new Ms1FeatureMatrix(_run); } } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // pre-generate deconvoluted spectra for scoring _massBinComparer = new FilteredProteinMassBinning(AminoAcidSet, MaxSequenceMass + 1000); _ms2ScorerFactory2 = new CompositeScorerFactory(_run, _massBinComparer, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance); sw.Reset(); Console.WriteLine(@"Generating deconvoluted spectra for MS/MS spectra..."); sw.Start(); var pfeOptions = new ParallelOptions { MaxDegreeOfParallelism = MaxNumThreads, CancellationToken = cancellationToken ?? CancellationToken.None }; Parallel.ForEach(_ms2ScanNums, pfeOptions, ms2ScanNum => { _ms2ScorerFactory2.DeconvonluteProductSpectrum(ms2ScanNum); }); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.StepRange(10.0); progData.Status = "Reading Fasta File"; // Target database var targetDb = new FastaDatabase(DatabaseFilePath); targetDb.Read(); // Generate sequence tags for all MS/MS spectra if (TagBasedSearch) { progData.StepRange(25.0); progData.Status = "Generating Sequence Tags"; sw.Reset(); Console.WriteLine(@"Generating sequence tags for MS/MS spectra..."); sw.Start(); var seqTagGen = GetSequenceTagGenerator(); _tagMs2ScanNum = seqTagGen.GetMs2ScanNumsContainingTags().ToArray(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); _tagSearchEngine = new ScanBasedTagSearchEngine(_run, seqTagGen, new LcMsPeakMatrix(_run, ms1Filter), targetDb, ProductIonTolerance, AminoAcidSet, _ms2ScorerFactory2, ScanBasedTagSearchEngine.DefaultMinMatchedTagLength, MaxSequenceMass, MinProductIonCharge, MaxProductIonCharge); } var specFileName = MassSpecDataReaderFactory.RemoveExtension(Path.GetFileName(SpecFilePath)); var targetOutputFilePath = Path.Combine(OutputDir, specFileName + TargetFileNameEnding); var decoyOutputFilePath = Path.Combine(OutputDir, specFileName + DecoyFileNameEnding); var tdaOutputFilePath = Path.Combine(OutputDir, specFileName + TdaFileNameEnding); progData.StepRange(60.0); progData.Status = "Running Target search"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target)) { sw.Reset(); Console.Write(@"Reading the target database..."); sw.Start(); targetDb.Read(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); var targetMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1]; progData.MaxPercentage = 42.5; if (TagBasedSearch) { sw.Reset(); Console.WriteLine(@"Tag-based searching the target database"); sw.Start(); RunTagBasedSearch(targetMatches, targetDb, null, prog); Console.WriteLine(@"Target database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.MaxPercentage = 60.0; sw.Reset(); Console.WriteLine(@"Searching the target database"); sw.Start(); RunSearch(targetMatches, targetDb, ms1Filter, null, prog); Console.WriteLine(@"Target database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // calculate spectral e-value usign generating function sw.Reset(); Console.WriteLine(@"Calculating spectral E-values for target-spectrum matches"); sw.Start(); var bestTargetMatches = RunGeneratingFunction(targetMatches); WriteResultsToFile(bestTargetMatches, targetOutputFilePath, targetDb); sw.Stop(); Console.WriteLine(@"Target-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.StepRange(95.0); // total to 95% progData.Status = "Running Decoy search"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy)) { // Decoy database sw.Reset(); sw.Start(); var decoyDb = targetDb.Decoy(null, true); Console.Write(@"Reading the decoy database..."); decoyDb.Read(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.MaxPercentage = 77.5; var decoyMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1]; if (TagBasedSearch) { sw.Reset(); Console.WriteLine(@"Tag-based searching the decoy database"); sw.Start(); RunTagBasedSearch(decoyMatches, decoyDb, null, prog); Console.WriteLine(@"Decoy database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.MaxPercentage = 95.0; sw.Reset(); Console.WriteLine(@"Searching the decoy database"); sw.Start(); RunSearch(decoyMatches, decoyDb, ms1Filter, null, prog); Console.WriteLine(@"Decoy database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // calculate spectral e-value usign generating function sw.Reset(); Console.WriteLine(@"Calculating spectral E-values for decoy-spectrum matches"); sw.Start(); var bestDecoyMatches = RunGeneratingFunction(decoyMatches); WriteResultsToFile(bestDecoyMatches, decoyOutputFilePath, decoyDb); sw.Stop(); Console.WriteLine(@"Decoy-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.StepRange(100.0); progData.Status = "Writing combined results file"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both)) { // Add "Qvalue" and "PepQValue" var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath); if (fdrCalculator.HasError()) { ErrorMessage = fdrCalculator.ErrorMessage; Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage); return(false); } fdrCalculator.WriteTo(tdaOutputFilePath); } progData.Report(100.0); Console.WriteLine(@"Done."); swAll.Stop(); Console.WriteLine(@"Total elapsed time for search: {0:f1} sec ({1:f2} min)", swAll.Elapsed.TotalSeconds, swAll.Elapsed.TotalMinutes); return(true); }
/// <summary> /// Find features in the data file /// </summary> /// <param name="rawFile">Data file (either a pbf file or a file type from which a pbf file can be auto-created)</param> /// <returns>0 if success; negative number on error</returns> private int ProcessFile(string rawFile) { var outDirectory = GetOutputDirectory(rawFile); if (string.IsNullOrEmpty(outDirectory)) { return(-1); } var baseName = Path.GetFileName(MassSpecDataReaderFactory.RemoveExtension(rawFile)); var ms1FeaturesFilePath = Path.Combine(outDirectory, baseName + "." + FileExtension); var outCsvFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".csv"); var pngFilePath = Path.Combine(outDirectory, baseName + "_" + FileExtension + ".png"); if (File.Exists(ms1FeaturesFilePath)) { ShowErrorMessage("ProMex output already exists: " + ms1FeaturesFilePath); return(-2); } if (!File.Exists(rawFile)) { ShowErrorMessage("Cannot find input file: " + rawFile); return(-3); } var stopwatch = Stopwatch.StartNew(); Console.WriteLine("Start loading MS1 data from {0}", rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var featureFinder = new LcMsPeakMatrix(run, _likelihoodScorer, 1, 60, Parameters.MaxThreads); Console.WriteLine("Complete loading MS1 data. Elapsed Time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); if (run.GetMs1ScanVector().Length == 0) { ShowErrorMessage(@"Data file has no MS1 spectra: " + Path.GetFileName(rawFile)); return(-4); } if (featureFinder.Ms1PeakCount == 0) { ShowErrorMessage(@"Data file has no MS1 peaks: " + Path.GetFileName(rawFile)); return(-5); } var comparer = featureFinder.Comparer; var container = new LcMsFeatureContainer(featureFinder.Ms1Spectra, _likelihoodScorer, new LcMsFeatureMergeComparer(new Tolerance(10))); var minSearchMassBin = comparer.GetBinNumber(Parameters.MinSearchMass); var maxSearchMassBin = comparer.GetBinNumber(Parameters.MaxSearchMass); double totalMassBin = maxSearchMassBin - minSearchMassBin + 1; Console.WriteLine("Start MS1 feature extraction."); stopwatch.Restart(); for (var binNum = minSearchMassBin; binNum <= maxSearchMassBin; binNum++) { var clusters = featureFinder.FindFeatures(binNum); container.Add(clusters); if (binNum > minSearchMassBin && (binNum - minSearchMassBin) % 1000 == 0) { var elapsed = (stopwatch.ElapsedMilliseconds) / 1000.0d; var processedBins = binNum - minSearchMassBin; var processedPercentage = processedBins / totalMassBin * 100; Console.WriteLine("Processing {0:0.0}% of mass bins ({1:0.0} Da); elapsed time = {2:0.000} sec; # of features = {3}", processedPercentage, featureFinder.Comparer.GetMzEnd(binNum), elapsed, container.NumberOfFeatures); } } Console.WriteLine("Complete MS1 feature extraction."); Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(" - Number of extracted features = {0}", container.NumberOfFeatures); Console.WriteLine("Start selecting mutually independent features from feature network graph"); stopwatch.Restart(); var featureId = FilterAndOutputFeatures(container, featureFinder, outCsvFilePath, ms1FeaturesFilePath); Console.WriteLine("Complete feature filtration"); Console.WriteLine(" - Elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); Console.WriteLine(" - Number of filtered features = {0}", featureId); Console.WriteLine(" - ProMex output: {0}", ms1FeaturesFilePath); if (Parameters.CsvOutput) { Console.WriteLine(" - ProMex output in ICR2LS format: {0}", outCsvFilePath); } if (Parameters.FeatureMapImage) { CreateFeatureMapImage(run, ms1FeaturesFilePath, pngFilePath); } return(0); }