public void TestTagBasedSearchCompRef() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSetPath = @"D:\MassSpecFiles\CompRef"; const string fastaFilePath = @"D:\MassSpecFiles\CompRef\ID_003278_4B4B3CB1.fasta"; const string modsFilePath = @"D:\MassSpecFiles\CompRef\Mods.txt"; if (!Directory.Exists(dataSetPath)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, dataSetPath); } if (!File.Exists(modsFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fileEntries = Directory.GetFiles(dataSetPath); var dataset = (from fileName in fileEntries where fileName.EndsWith("pbf") select Path.GetFileNameWithoutExtension(fileName)).ToList(); dataset.Sort(); var fastaDb = new FastaDatabase(fastaFilePath); var tolerance = new Tolerance(10); var aaSet = new AminoAcidSet(modsFilePath); for (var i = 0; i < dataset.Count; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", dataSetPath, dataset[i]); var ms1File = string.Format(@"{0}\{1}.ms1ft", dataSetPath, dataset[i]); var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFile, ".seqtag"); var run = PbfLcMsRun.GetLcMsRun(rawFile); const int minTagLength = 5; //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100); Console.WriteLine("-----------------{0}--------------------", rawFile); TestTagBasedSearch(run, fastaDb, tolerance, aaSet); Console.WriteLine("-----------------------------------------------------------------------"); } }
public void TestTagBasedSearch() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // const string rawFilePath = @"H:\Research\Lewy\raw\Lewy_intact_01.raw"; // const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; // const string rawFilePath = @"H:\Research\Yufeng\TopDownYufeng\raw\yufeng_column_test2.raw"; // const string rawFilePath = @"H:\Research\Weijun_TopDown\raw\UC4_Intact_plasmaTest_90_6May15_Bane_14-09-01RZ.raw"; // const string rawFilePath = @"H:\Research\Charles\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw"; const string rawFilePath = @"D:\MassSpecFiles\60k\Yufeng_SampleTest1_150614113438.pbf"; //const string rawFilePath = @"D:\MassSpecFiles\60k\NCR_50K_Test_24Jun15_Bane_15-02-02RZ.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); //const int minTagLength = 5; var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, ".seqtag"); //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 100); const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_003836_DA9CC1E4.fasta"; //const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_004973_9BA6912F.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var tolerance = new Tolerance(10); // var modsFilePath = @"H:\Research\QCShew_TopDown\Production\Mods_Methyl.txt"; var modsFilePath = @"D:\MassSpecFiles\60k\Mods.txt"; // var modsFilePath = ""; if (!File.Exists(modsFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath); } var aaSet = new AminoAcidSet(modsFilePath); TestTagBasedSearch(run, fastaDb, tolerance, aaSet); }
public void TestTagBasedSearchForLewy() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFilePath = @"D:\MassSpecFiles\Lewy\Lewy_AT_AD1_21May15_Bane_14-09-01RZ.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); //const int minTagLength = 4; var tagFilePath = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, ".seqtag"); //var tagParser = new SequenceTagParser(tagFilePath, minTagLength, 10000); const string fastaFilePath = @"D:\MassSpecFiles\Lewy\a4_human.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var tolerance = new Tolerance(10); var modsFilePath = @"D:\MassSpecFiles\Lewy\Mods.txt"; if (!File.Exists(modsFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath); } var aaSet = new AminoAcidSet(modsFilePath); TestTagBasedSearch(run, fastaDb, tolerance, aaSet); }
public void TestTagMatchingSingleSpec() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; const int scanNum = 4533; // Parse sequence tags var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag"); const int minTagLength = 8; const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; if (!File.Exists(tagFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var tags = tagParser.GetSequenceTags(scanNum); foreach (var tag in tags) { var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence) .Select(index => fastaDb.GetProteinName(index)).ToArray(); if (matchedProteins.Any()) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", tag.Sequence, tag.IsPrefix, tag.FlankingMass, string.Join("\t", matchedProteins)); } } }
private bool HasExistingPbfFile(string path) { return(File.Exists(MassSpecDataReaderFactory.ChangeExtension(path, ".pbf"))); }
public void TestTagMatching() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parse sequence tags const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; const int minTagLength = 8; var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag"); if (!File.Exists(tagFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName); } var tagParser = new SequenceTagParser(tagFileName, minTagLength); // Parse raw file var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); if (!File.Exists(rawFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName); } var run = PbfLcMsRun.GetLcMsRun(rawFileName); // Parse ID file const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var resultParser = new MsPathFinderParser(resultFilePath); const double qValueThreshold = 0.01; var idList = resultParser.GetIdWithQValuesNoLargerThan(qValueThreshold); var idFlag = new bool[run.MaxLcScan + 1]; foreach (var id in idList) { idFlag[id.Scan] = true; } const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } // const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta"; var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var numMs2Spectra = 0; var numSpectraWithTag = 0; var numSpectraWithMatchingTag = 0; var numSpectraWithMatchedTagNoId = 0; foreach (var ms2ScanNum in run.GetScanNumbers(2)) { ++numMs2Spectra; var tags = tagParser.GetSequenceTags(ms2ScanNum); if (tags != null) { ++numSpectraWithTag; foreach (var tag in tags) { if (searchableDb.Search(tag.Sequence) >= 0) { //Console.WriteLine(tag.Sequence); ++numSpectraWithMatchingTag; if (!idFlag[ms2ScanNum]) { ++numSpectraWithMatchedTagNoId; } break; } } } } Console.WriteLine("Tag length: {0}", minTagLength); Console.WriteLine("NumMs2Spectra: {0}", numMs2Spectra); Console.WriteLine("NumMs2SpectraWithTags: {0} ({1})", numSpectraWithTag, numSpectraWithTag / (float)numMs2Spectra); Console.WriteLine("NumMs2SpectraWithMatchingTags: {0} ({1})", numSpectraWithMatchingTag, numSpectraWithMatchingTag / (float)numMs2Spectra); Console.WriteLine("NumMs2SpectraWithMatchingTagsWithNoId: {0} ({1})", numSpectraWithMatchedTagNoId, numSpectraWithMatchedTagNoId / (float)numMs2Spectra); }
public void TestFeatureIdMatching() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var resultParser = new MsPathFinderParser(resultFilePath); const double qValueThreshold = 0.01; const double tolerancePpm = 13; const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); if (!File.Exists(rawFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName); } var run = PbfLcMsRun.GetLcMsRun(rawFileName); var idList = resultParser.GetIdList().TakeWhile(id => id.QValue <= qValueThreshold).OrderBy(id => id.Mass).ToList(); var idMassList = idList.Select(id => id.Mass).ToList(); var idFlag = new bool[idList.Count]; // Parse sequence tags var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag"); const int minTagLength = 6; const int numProtMatches = 4; // const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta"; if (!File.Exists(tagFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft"); var featureParser = new TsvFileParser(featureFileName); var minScan = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray(); var maxScan = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray(); var minCharge = featureParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray(); var maxCharge = featureParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray(); var monoMass = featureParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray(); var numFeaturesWithId = 0; var numFeaturesWithMs2 = 0; var numFeaturesWithTags = 0; var numFeaturesWithMatchingTags = 0; var numFeaturesWithTwoOrMoreMatchingTags = 0; var numFeaturesWithNoIdAndMatchingTags = 0; for (var i = 0; i < featureParser.NumData; i++) { var mass = monoMass[i]; // Find Id var tolDa = new Tolerance(tolerancePpm).GetToleranceAsDa(mass, 1); var minMass = mass - tolDa; var maxMass = mass + tolDa; var index = idMassList.BinarySearch(mass); if (index < 0) { index = ~index; } var matchedId = new List <MsPathFinderId>(); // go down var curIndex = index - 1; while (curIndex >= 0) { var curId = idList[curIndex]; if (curId.Mass < minMass) { break; } if (curId.Scan > minScan[i] && curId.Scan < maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } --curIndex; } // go up curIndex = index; while (curIndex < idList.Count) { var curId = idList[curIndex]; if (curId.Mass > maxMass) { break; } if (curId.Scan >= minScan[i] && curId.Scan <= maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } ++curIndex; } var hasId = false; if (matchedId.Any()) { ++numFeaturesWithId; hasId = true; } // Find MS2 scans // var numMs2Scans = 0; var tags = new List <SequenceTag>(); var hasMs2 = false; for (var scanNum = minScan[i]; scanNum <= maxScan[i]; scanNum++) { var isolationWindow = run.GetIsolationWindow(scanNum); if (isolationWindow == null) { continue; } var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz; var charge = (int)Math.Round(mass / isolationWindowTargetMz); if (charge < minCharge[i] || charge > maxCharge[i]) { continue; } var mz = Ion.GetIsotopeMz(mass, charge, Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex); if (isolationWindow.Contains(mz)) { // ++numMs2Scans; tags.AddRange(tagParser.GetSequenceTags(scanNum)); hasMs2 = true; } } if (hasMs2) { ++numFeaturesWithMs2; } if (tags.Any()) { ++numFeaturesWithTags; } var protHist = new Dictionary <string, int>(); var hasMatchedTag = false; foreach (var tag in tags) { var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).Select(idx => fastaDb.GetProteinName(idx)).ToArray(); if (matchedProteins.Any()) { hasMatchedTag = true; foreach (var protein in matchedProteins) { int num; if (protHist.TryGetValue(protein, out num)) { protHist[protein] = num + 1; } else { protHist[protein] = 1; } } } } if (hasMatchedTag) { ++numFeaturesWithMatchingTags; if (!hasId) { ++numFeaturesWithNoIdAndMatchingTags; } } if (protHist.Any()) { var maxOcc = protHist.Values.Max(); if (maxOcc >= numProtMatches) { ++numFeaturesWithTwoOrMoreMatchingTags; } } } Console.WriteLine("NumFeatures: {0}", featureParser.NumData); Console.WriteLine("NumId: {0}", idList.Count); Console.WriteLine("NumFeaturesWithId: {0} ({1})", numFeaturesWithId, numFeaturesWithId / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMs2: {0} ({1})", numFeaturesWithMs2, numFeaturesWithMs2 / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithTag: {0} ({1})", numFeaturesWithTags, numFeaturesWithTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMatchedTag: {0} ({1})", numFeaturesWithMatchingTags, numFeaturesWithMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMoreThanOneMatchedTag: {0} ({1})", numFeaturesWithTwoOrMoreMatchingTags, numFeaturesWithTwoOrMoreMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithNoIdAndMatchedTag: {0} ({1})", numFeaturesWithNoIdAndMatchingTags, numFeaturesWithNoIdAndMatchingTags / (float)featureParser.NumData); for (var i = 0; i < idFlag.Length; i++) { if (!idFlag[i]) { Console.WriteLine(idList[i].Scan); } } // Console.WriteLine(string.Join(",", filter.GetMatchingMs2ScanNums(8115.973001))); // // Console.WriteLine(featureFileName); }
public void TestFeatureId() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; if (!File.Exists(dataSet)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataSet); } // Feature: 5236-5286 6-12 8480.3681 5 const int minScanNum = 5236; const int maxScanNum = 5286; const double featureMass = 8480.3681; //const int minScanNum = 7251; //const int maxScanNum = 7326; //const double featureMass = 32347.18; // const int minScanNum = 4451; // const int maxScanNum = 4541; // const double featureMass = 31267.95; var tolerance = new Tolerance(10); var relaxedTolerance = new Tolerance(20); const int minTagLength = 5; const int minMergedTagLength = 7; const int minNumTagMatches = 1; var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); var run = PbfLcMsRun.GetLcMsRun(rawFileName); var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet(); var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft"); var filter = new Ms1FtFilter(run, tolerance, featureFileName); var ms2ScanNums = filter.GetMatchingMs2ScanNums(featureMass) .Where(scanNum => scanNum > minScanNum && scanNum < maxScanNum) .ToArray(); const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag"); const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var proteinsToTags = new Dictionary <string, IList <MatchedTag> >(); foreach (var ms2ScanNum in ms2ScanNums) { var tags = tagParser.GetSequenceTags(ms2ScanNum); foreach (var tag in tags) { var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray(); foreach (var index in matchedIndices) { var protein = fastaDb.GetProteinName(index); var startIndex = fastaDb.GetZeroBasedPositionInProtein(index); var matchedTag = new MatchedTag(tag, startIndex, featureMass); IList <MatchedTag> existingTags; if (proteinsToTags.TryGetValue(protein, out existingTags)) { existingTags.Add(matchedTag); } else { proteinsToTags.Add(protein, new List <MatchedTag> { matchedTag }); } } } } foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count)) { if (entry.Value.Count < minNumTagMatches) { break; } var proteinName = entry.Key; var proteinSequence = fastaDb.GetProteinSequence(proteinName); var protein = new Sequence(proteinSequence, aminoAcidSet); Console.WriteLine(proteinName + "\t" + entry.Value.Count); var matchedTagSet = new MatchedTagSet(proteinSequence, aminoAcidSet, tolerance, relaxedTolerance); Console.WriteLine("********** Before merging"); foreach (var matchedTag in entry.Value) { var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}", (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); matchedTagSet.Add(matchedTag); } Console.WriteLine("********** After merging"); foreach (var matchedTag in matchedTagSet.Tags) { if (matchedTag.Length < minMergedTagLength) { continue; } var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}", (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); } break; } }
public void Write() { var outputFilePath = Path.Combine(OutputDir, Path.GetFileNameWithoutExtension(SpecFilePath) + ParameterFileExtension); using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("SpecFile\t" + Path.GetFileName(SpecFilePath)); writer.WriteLine("DatabaseFile\t" + Path.GetFileName(DatabaseFilePath)); writer.WriteLine("FeatureFile\t{0}", !string.IsNullOrWhiteSpace(FeatureFilePath) ? Path.GetFileName(FeatureFilePath) : Path.GetFileName(MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, ".ms1ft"))); writer.WriteLine("InternalCleavageMode\t" + InternalCleavageMode); writer.WriteLine("Tag-based search\t" + TagBasedSearch); writer.WriteLine("Tda\t" + (TargetDecoySearchMode == DatabaseSearchMode.Both ? "Target+Decoy" : TargetDecoySearchMode.ToString())); writer.WriteLine("PrecursorIonTolerancePpm\t" + PrecursorIonTolerancePpm); writer.WriteLine("ProductIonTolerancePpm\t" + ProductIonTolerancePpm); writer.WriteLine("MinSequenceLength\t" + MinSequenceLength); writer.WriteLine("MaxSequenceLength\t" + MaxSequenceLength); writer.WriteLine("MinPrecursorIonCharge\t" + MinPrecursorIonCharge); writer.WriteLine("MaxPrecursorIonCharge\t" + MaxPrecursorIonCharge); writer.WriteLine("MinProductIonCharge\t" + MinProductIonCharge); writer.WriteLine("MaxProductIonCharge\t" + MaxProductIonCharge); writer.WriteLine("MinSequenceMass\t" + MinSequenceMass); writer.WriteLine("MaxSequenceMass\t" + MaxSequenceMass); writer.WriteLine("MaxDynamicModificationsPerSequence\t" + MaxDynamicModificationsPerSequence); foreach (var searchMod in Modifications) { writer.WriteLine("Modification\t" + searchMod); } } }
public static int Main(string[] args) { PbfGenInputParameters options; try { var osVersionInfo = new clsOSVersionInfo(); if (osVersionInfo.GetOSVersion().ToLower().Contains("windows")) { var handle = Process.GetCurrentProcess().MainWindowHandle; SetConsoleMode(handle, EnableExtendedFlags); } var exeName = System.Reflection.Assembly.GetEntryAssembly().GetName().Name; var parser = new CommandLineParser <PbfGenInputParameters>(exeName, Version); parser.UsageExamples.Add($"Using -start and -end to limit the scan range to include in the .pbf file\n\t{exeName}.exe -s Dataset.raw -start 2000 -end 3000"); var results = parser.ParseArgs(args); if (!results.Success) { // Wait for 1.5 seconds System.Threading.Thread.Sleep(1500); return(-1); } if (!results.ParsedResults.Validate()) { parser.PrintHelp(); // Wait for 1.5 seconds System.Threading.Thread.Sleep(1500); return(-1); } options = results.ParsedResults; } catch (Exception ex) { Console.WriteLine("Exception while parsing the command line parameters: " + ex.Message); return(-5); } #if (!DEBUG) try #endif { var specFilePaths = new[] { options.SourcePath }; if (Directory.Exists(options.SourcePath) && !MassSpecDataReaderFactory.IsADirectoryDataset(options.SourcePath)) { specFilePaths = Directory.GetFiles(options.SourcePath, "*.raw"); // TODO: Support folders with other formats in them too... } foreach (var rawFilePath in specFilePaths) { var pbfFileName = MassSpecDataReaderFactory.ChangeExtension(rawFilePath, PbfLcMsRun.FileExtensionConst); var pbfFilePath = Path.Combine(options.OutputDir, Path.GetFileName(pbfFileName)); if (File.Exists(pbfFilePath) && PbfLcMsRun.CheckFileFormatVersion(pbfFilePath, out var isCurrent) && isCurrent) { Console.WriteLine("{0} already exists.", pbfFilePath); continue; } Console.WriteLine("Creating {0} from {1}", pbfFilePath, rawFilePath); if (options.StartScan > 0 && options.EndScan > 0) { Console.WriteLine("Only including scans {0} to {1}", options.StartScan, options.EndScan); } else if (options.StartScan > 0) { Console.WriteLine("Only including scans {0} to the end", options.StartScan); } else if (options.EndScan > 0) { Console.WriteLine("Only including scans 1 to {0}", options.EndScan); } var reader = MassSpecDataReaderFactory.GetMassSpecDataReader(rawFilePath); var progress = new Progress <ProgressData>(p => { p.UpdateFrequencySeconds = 2; if ((p.Percent % 25).Equals(0) || p.ShouldUpdate()) { Console.Write("\r{0}, {1:00.0}% complete ", p.Status, p.Percent); } }); var run = new PbfLcMsRun(rawFilePath, reader, pbfFilePath, 0, 0, progress, false, options.StartScan, options.EndScan); Console.WriteLine(); } Console.WriteLine("PbfFormatVersion: {0}", PbfLcMsRun.FileFormatId); return(0); } #if (!DEBUG) catch (Exception ex) { // NOTE: The DMS Analysis Manager looks for this text; do not change it Console.WriteLine("Exception while processing: " + ex.Message); Console.WriteLine(ex.StackTrace); var errorCode = -Math.Abs(ex.Message.GetHashCode()); if (errorCode == 0) { return(-1); } else { return(errorCode); } } #endif }
public void Write() { foreach (var specFilePath in SpecFilePaths) { var outputFilePath = Path.Combine(OutputDir, Path.GetFileNameWithoutExtension(specFilePath) + ParameterFileExtension); using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("SpecFile\t" + Path.GetFileName(specFilePath)); writer.WriteLine("DatabaseFile\t" + Path.GetFileName(DatabaseFilePath)); writer.WriteLine("FeatureFile\t{0}", FeatureFilePath != null ? Path.GetFileName(FeatureFilePath) : Path.GetFileName(MassSpecDataReaderFactory.ChangeExtension(specFilePath, ".ms1ft"))); writer.WriteLine("SearchMode\t" + SearchModeInt); writer.WriteLine("Tag-based search\t" + TagBasedSearch); writer.WriteLine("Tda\t" + (TdaBool == null ? "Decoy" : (bool)TdaBool ? "Target+Decoy" : "Target")); writer.WriteLine("PrecursorIonTolerancePpm\t" + PrecursorIonTolerancePpm); writer.WriteLine("ProductIonTolerancePpm\t" + ProductIonTolerancePpm); writer.WriteLine("MinSequenceLength\t" + MinSequenceLength); writer.WriteLine("MaxSequenceLength\t" + MaxSequenceLength); writer.WriteLine("MinPrecursorIonCharge\t" + MinPrecursorIonCharge); writer.WriteLine("MaxPrecursorIonCharge\t" + MaxPrecursorIonCharge); writer.WriteLine("MinProductIonCharge\t" + MinProductIonCharge); writer.WriteLine("MaxProductIonCharge\t" + MaxProductIonCharge); writer.WriteLine("MinSequenceMass\t" + MinSequenceMass); writer.WriteLine("MaxSequenceMass\t" + MaxSequenceMass); writer.WriteLine("MaxDynamicModificationsPerSequence\t" + _maxNumDynModsPerSequence); foreach (var searchMod in _searchModifications) { writer.WriteLine("Modification\t" + searchMod); } } } }
public bool RunSearch(double corrThreshold = 0.7, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null) { // Get the Normalized spec file/folder path SpecFilePath = MassSpecDataReaderFactory.NormalizeDatasetPath(SpecFilePath); var prog = new Progress <ProgressData>(); var progData = new ProgressData(progress); if (progress != null) { prog = new Progress <ProgressData>(p => { progData.Status = p.Status; progData.StatusInternal = p.StatusInternal; progData.Report(p.Percent); }); } var sw = new Stopwatch(); var swAll = new Stopwatch(); swAll.Start(); ErrorMessage = string.Empty; Console.Write(@"Reading raw file..."); progData.Status = "Reading spectra file"; progData.StepRange(10.0); sw.Start(); _run = PbfLcMsRun.GetLcMsRun(SpecFilePath, 0, 0, prog); _ms2ScanNums = _run.GetScanNumbers(2).ToArray(); _isolationWindowTargetMz = new double[_run.MaxLcScan + 1]; foreach (var ms2Scan in _ms2ScanNums) { var ms2Spec = _run.GetSpectrum(ms2Scan) as ProductSpectrum; if (ms2Spec == null) { continue; } _isolationWindowTargetMz[ms2Scan] = ms2Spec.IsolationWindow.IsolationWindowTargetMz; } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.StepRange(20.0); ISequenceFilter ms1Filter; if (this.ScanNumbers != null && this.ScanNumbers.Any()) { ms1Filter = new SelectedMsMsFilter(this.ScanNumbers); } else if (string.IsNullOrWhiteSpace(FeatureFilePath)) { // Checks whether SpecFileName.ms1ft exists var ms1FtFilePath = MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, LcMsFeatureFinderLauncher.FileExtension); if (!File.Exists(ms1FtFilePath)) { Console.WriteLine(@"Running ProMex..."); sw.Start(); var param = new LcMsFeatureFinderInputParameter { InputPath = SpecFilePath, MinSearchMass = MinSequenceMass, MaxSearchMass = MaxSequenceMass, MinSearchCharge = MinPrecursorIonCharge, MaxSearchCharge = MaxPrecursorIonCharge, CsvOutput = false, ScoreReport = false, LikelihoodScoreThreshold = -10 }; var featureFinder = new LcMsFeatureFinderLauncher(param); featureFinder.Run(); } sw.Reset(); sw.Start(); Console.Write(@"Reading ProMex results..."); ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, ms1FtFilePath, -10); } else { sw.Reset(); sw.Start(); var extension = Path.GetExtension(FeatureFilePath); if (extension.ToLower().Equals(".csv")) { Console.Write(@"Reading ICR2LS/Decon2LS results..."); ms1Filter = new IsosFilter(_run, PrecursorIonTolerance, FeatureFilePath); } else if (extension.ToLower().Equals(".ms1ft")) { Console.Write(@"Reading ProMex results..."); ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, FeatureFilePath, -10); } else if (extension.ToLower().Equals(".msalign")) { Console.Write(@"Reading MS-Align+ results..."); ms1Filter = new MsDeconvFilter(_run, PrecursorIonTolerance, FeatureFilePath); } else { ms1Filter = null; //new Ms1FeatureMatrix(_run); } } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // pre-generate deconvoluted spectra for scoring _massBinComparer = new FilteredProteinMassBinning(AminoAcidSet, MaxSequenceMass + 1000); _ms2ScorerFactory2 = new CompositeScorerFactory(_run, _massBinComparer, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance); sw.Reset(); Console.WriteLine(@"Generating deconvoluted spectra for MS/MS spectra..."); sw.Start(); var pfeOptions = new ParallelOptions { MaxDegreeOfParallelism = MaxNumThreads, CancellationToken = cancellationToken ?? CancellationToken.None }; Parallel.ForEach(_ms2ScanNums, pfeOptions, ms2ScanNum => { _ms2ScorerFactory2.DeconvonluteProductSpectrum(ms2ScanNum); }); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.StepRange(10.0); progData.Status = "Reading Fasta File"; // Target database var targetDb = new FastaDatabase(DatabaseFilePath); targetDb.Read(); // Generate sequence tags for all MS/MS spectra if (TagBasedSearch) { progData.StepRange(25.0); progData.Status = "Generating Sequence Tags"; sw.Reset(); Console.WriteLine(@"Generating sequence tags for MS/MS spectra..."); sw.Start(); var seqTagGen = GetSequenceTagGenerator(); _tagMs2ScanNum = seqTagGen.GetMs2ScanNumsContainingTags().ToArray(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); _tagSearchEngine = new ScanBasedTagSearchEngine(_run, seqTagGen, new LcMsPeakMatrix(_run, ms1Filter), targetDb, ProductIonTolerance, AminoAcidSet, _ms2ScorerFactory2, ScanBasedTagSearchEngine.DefaultMinMatchedTagLength, MaxSequenceMass, MinProductIonCharge, MaxProductIonCharge); } var specFileName = MassSpecDataReaderFactory.RemoveExtension(Path.GetFileName(SpecFilePath)); var targetOutputFilePath = Path.Combine(OutputDir, specFileName + TargetFileNameEnding); var decoyOutputFilePath = Path.Combine(OutputDir, specFileName + DecoyFileNameEnding); var tdaOutputFilePath = Path.Combine(OutputDir, specFileName + TdaFileNameEnding); progData.StepRange(60.0); progData.Status = "Running Target search"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target)) { sw.Reset(); Console.Write(@"Reading the target database..."); sw.Start(); targetDb.Read(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); var targetMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1]; progData.MaxPercentage = 42.5; if (TagBasedSearch) { sw.Reset(); Console.WriteLine(@"Tag-based searching the target database"); sw.Start(); RunTagBasedSearch(targetMatches, targetDb, null, prog); Console.WriteLine(@"Target database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.MaxPercentage = 60.0; sw.Reset(); Console.WriteLine(@"Searching the target database"); sw.Start(); RunSearch(targetMatches, targetDb, ms1Filter, null, prog); Console.WriteLine(@"Target database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // calculate spectral e-value usign generating function sw.Reset(); Console.WriteLine(@"Calculating spectral E-values for target-spectrum matches"); sw.Start(); var bestTargetMatches = RunGeneratingFunction(targetMatches); WriteResultsToFile(bestTargetMatches, targetOutputFilePath, targetDb); sw.Stop(); Console.WriteLine(@"Target-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.StepRange(95.0); // total to 95% progData.Status = "Running Decoy search"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy)) { // Decoy database sw.Reset(); sw.Start(); var decoyDb = targetDb.Decoy(null, true); Console.Write(@"Reading the decoy database..."); decoyDb.Read(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.MaxPercentage = 77.5; var decoyMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1]; if (TagBasedSearch) { sw.Reset(); Console.WriteLine(@"Tag-based searching the decoy database"); sw.Start(); RunTagBasedSearch(decoyMatches, decoyDb, null, prog); Console.WriteLine(@"Decoy database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.MaxPercentage = 95.0; sw.Reset(); Console.WriteLine(@"Searching the decoy database"); sw.Start(); RunSearch(decoyMatches, decoyDb, ms1Filter, null, prog); Console.WriteLine(@"Decoy database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // calculate spectral e-value usign generating function sw.Reset(); Console.WriteLine(@"Calculating spectral E-values for decoy-spectrum matches"); sw.Start(); var bestDecoyMatches = RunGeneratingFunction(decoyMatches); WriteResultsToFile(bestDecoyMatches, decoyOutputFilePath, decoyDb); sw.Stop(); Console.WriteLine(@"Decoy-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.StepRange(100.0); progData.Status = "Writing combined results file"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both)) { // Add "Qvalue" and "PepQValue" var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath); if (fdrCalculator.HasError()) { ErrorMessage = fdrCalculator.ErrorMessage; Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage); return(false); } fdrCalculator.WriteTo(tdaOutputFilePath); } progData.Report(100.0); Console.WriteLine(@"Done."); swAll.Stop(); Console.WriteLine(@"Total elapsed time for search: {0:f1} sec ({1:f2} min)", swAll.Elapsed.TotalSeconds, swAll.Elapsed.TotalMinutes); return(true); }