private void WriteResultsToFile(SortedSet <DatabaseSequenceSpectrumMatch>[] matches, string outputFilePath, FastaDatabase database) { using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("Scan\tPre\tSequence\tPost\tModifications\tComposition\tProteinName\tProteinDesc" + "\tProteinLength\tStart\tEnd\tCharge\tMostAbundantIsotopeMz\tMass\t#MatchedFragments\tIcScore" ); for (var scanNum = _run.MinLcScan; scanNum <= _run.MaxLcScan; scanNum++) { if (matches[scanNum] == null) { continue; } foreach (var match in matches[scanNum].Reverse()) { var sequence = match.Sequence; var offset = match.Offset; var start = database.GetOneBasedPositionInProtein(offset) + 1 + match.NumNTermCleavages; var end = start + sequence.Length - 1; var proteinName = database.GetProteinName(match.Offset); var protLength = database.GetProteinLength(proteinName); var ion = match.Ion; var scores = _bottomUpScorer.GetScores(match, ion.Composition, ion.Charge, scanNum); if (ion == null) { Console.WriteLine(@"Null ion!"); } if (scores == null) { Console.WriteLine(@"Null scores"); } // Note for DblToString(value, 9, true), by having "9" and "true", // values between 100 and 999 Da will have 7 digits after the decimal place, and // values between 1000 and 9999 will have 6 digits after the decimal place writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}", scanNum, match.Pre, sequence, // Sequence match.Post, scores.Modifications, // Modifications ion.Composition, // Composition proteinName, // ProteinName database.GetProteinDescription(match.Offset), // ProteinDescription protLength, // ProteinLength start, // Start end, // End ion.Charge, // precursorCharge StringUtilities.DblToString(ion.GetMostAbundantIsotopeMz(), 9, true), // MostAbundantIsotopeMz StringUtilities.DblToString(ion.Composition.Mass, 9, true), // Mass match.Score, scores.Score // Score (re-scored) ); } } } }
public void TestGettingProteinLengthAndPosition() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta"; if (!File.Exists(dbFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile); } var db = new FastaDatabase(dbFile); db.Read(); var indexedDb = new IndexedDatabase(db); foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsets(6, 20, 2, 0, Enzyme.Trypsin)) { var annotation = peptideAnnotationAndOffset.Annotation; var offset = peptideAnnotationAndOffset.Offset; Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", annotation, offset, db.GetProteinName(offset), db.GetProteinLength(db.GetProteinName(offset)), db.GetOneBasedPositionInProtein(offset) + 1); } }
public void TestGettingProteinLengthAndPosition() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta")); var db = new FastaDatabase(fastaFile.FullName); db.Read(); var indexedDb = new IndexedDatabase(db); foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsets(6, 20, 2, 0, Enzyme.Trypsin)) { var annotation = peptideAnnotationAndOffset.Annotation; var offset = peptideAnnotationAndOffset.Offset; Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", annotation, offset, db.GetProteinName(offset), db.GetProteinLength(db.GetProteinName(offset)), db.GetOneBasedPositionInProtein(offset) + 1); } }
private void WriteResultsToFile(DatabaseSequenceSpectrumMatch[] matches, string outputFilePath, FastaDatabase database) { using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("Scan\tPre\tSequence\tPost\tModifications\tComposition\tProteinName\tProteinDesc" + "\tProteinLength\tStart\tEnd\tCharge\tMostAbundantIsotopeMz\tMass\t#MatchedFragments\tProbability\tSpecEValue\tEValue"); foreach (var scanNum in _ms2ScanNums) { var match = matches[scanNum]; if (match == null) { continue; } var sequence = match.Sequence; var offset = match.Offset; var start = database.GetOneBasedPositionInProtein(offset) + 1 + match.NumNTermCleavages; var end = start + sequence.Length - 1; var proteinName = database.GetProteinName(match.Offset); var protLength = database.GetProteinLength(proteinName); var ion = match.Ion; var proteinDescription = database.GetProteinDescription(match.Offset); var probability = CompositeScorer.GetProbability(match.Score); // Note for DblToString(value, 9, true), by having "9" and "true", // values between 100 and 999 Da will have 7 digits after the decimal place, and // values between 1000 and 9999 will have 6 digits after the decimal place writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}\t{16}\t{17}", scanNum, match.Pre, // Pre sequence, // Sequence match.Post, // Post match.ModificationText, // Modifications ion.Composition, // Composition proteinName, // ProteinName proteinDescription, // ProteinDescription protLength, // ProteinLength start, // Start position in protein end, // End position in protein ion.Charge, // precursorCharge StringUtilities.DblToString(ion.GetMostAbundantIsotopeMz(), 9, true), // MostAbundantIsotopeMz StringUtilities.DblToString(ion.Composition.Mass, 9, true), // Mass match.NumMatchedFragments, // (Number of matched fragments) StringUtilities.DblToString(probability, 4), // Probability StringUtilities.DblToString(ExcelMinValue(match.SpecEvalue), 6, true, 0.001), // EValue; will be displayed using scientific notation if the value is less than 0.001 StringUtilities.DblToString(ExcelMinValue(match.SpecEvalue * database.GetNumEntries()), 6, true, 0.001) // SpecEValue; will be displayed using scientific notation if the value is less than 0.001 ); } } }
public void TestSearching() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta"; if (!File.Exists(dbFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile); } var db = new FastaDatabase(dbFile); var searchableDb = new SearchableDatabase(db); //const string pattern = "NSGSHFCGGSLINSQWVVSAAH"; const string pattern = "FPTDDDDK"; var position = searchableDb.Search(pattern); Assert.True(position >= 0); Console.WriteLine("Position: {0}", position); Console.WriteLine("Matched indices: {0}", string.Join(",", searchableDb.FindAllMatchedSequenceIndices(pattern))); Console.WriteLine("Protein indices: {0}", string.Join(",", searchableDb.FindAllMatchedSequenceIndices(pattern).Select(i => db.GetOneBasedPositionInProtein(i)))); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestGetProteinsWithTagMatchingSingleSpec() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07"; // const int scanNum = 5158; const int minTagLength = 7; const int minNumTagMatches = 1; var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet(); const int scanNum = 2; // Parse sequence tags //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag"); const string rawFilePath = ""; const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); //var tagParser = new SequenceTagParser(tagFileName, minTagLength); //var tags = tagParser.GetSequenceTags(scanNum); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var tagFinder = new SequenceTagFinder(spec, new Tolerance(5)); var tags = tagFinder.GetAllSequenceTagString(); var proteinsToTags = new Dictionary <string, IList <MatchedTag> >(); foreach (var tag in tags) { var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray(); foreach (var index in matchedIndices) { var protein = fastaDb.GetProteinName(index); var startIndex = fastaDb.GetOneBasedPositionInProtein(index); var matchedTag = new MatchedTag(tag, startIndex, 0.0); IList <MatchedTag> existingTags; if (proteinsToTags.TryGetValue(protein, out existingTags)) { existingTags.Add(matchedTag); } else { proteinsToTags.Add(protein, new List <MatchedTag> { matchedTag }); } } } foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count)) { if (entry.Value.Count < minNumTagMatches) { break; } var proteinName = entry.Key; var proteinSequence = fastaDb.GetProteinSequence(proteinName); var protein = new Sequence(proteinSequence, aminoAcidSet); Console.WriteLine(proteinName + "\t" + entry.Value.Count); foreach (var matchedTag in entry.Value) { var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}", matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass), seq, matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); } } }
public void TestSearching() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta")); var db = new FastaDatabase(fastaFile.FullName); var searchableDb = new SearchableDatabase(db); //const string pattern = "NSGSHFCGGSLINSQWVVSAAH"; const string pattern = "FPTDDDDK"; var position = searchableDb.Search(pattern); Assert.True(position >= 0); Console.WriteLine("Position: {0}", position); Console.WriteLine("Matched indices: {0}", string.Join(",", searchableDb.FindAllMatchedSequenceIndices(pattern))); Console.WriteLine("Protein indices: {0}", string.Join(",", searchableDb.FindAllMatchedSequenceIndices(pattern).Select(i => db.GetOneBasedPositionInProtein(i)))); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); }