private void WriteResultsToFile(DatabaseSequenceSpectrumMatch[] matches, string outputFilePath, FastaDatabase database) { using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("Scan\tPre\tSequence\tPost\tModifications\tComposition\tProteinName\tProteinDesc" + "\tProteinLength\tStart\tEnd\tCharge\tMostAbundantIsotopeMz\tMass\t#MatchedFragments\tProbability\tSpecEValue\tEValue"); foreach(var scanNum in _ms2ScanNums) { var match = matches[scanNum]; if (match == null) continue; var sequence = match.Sequence; var offset = match.Offset; var start = database.GetOneBasedPositionInProtein(offset) + 1 + match.NumNTermCleavages; var end = start + sequence.Length - 1; var proteinName = database.GetProteinName(match.Offset); var protLength = database.GetProteinLength(proteinName); var ion = match.Ion; var proteinDescription = database.GetProteinDescription(match.Offset); var probability = CompositeScorer.GetProbability(match.Score); // Note for DblToString(value, 9, true), by having "9" and "true", // values between 100 and 999 Da will have 7 digits after the decimal place, and // values between 1000 and 9999 will have 6 digits after the decimal place writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}\t{16}\t{17}", scanNum, match.Pre, // Pre sequence, // Sequence match.Post, // Post match.ModificationText, // Modifications ion.Composition, // Composition proteinName, // ProteinName proteinDescription, // ProteinDescription protLength, // ProteinLength start, // Start position in protein end, // End position in protein ion.Charge, // precursorCharge StringUtilities.DblToString(ion.GetMostAbundantIsotopeMz(), 9, true), // MostAbundantIsotopeMz StringUtilities.DblToString(ion.Composition.Mass, 9, true), // Mass match.NumMatchedFragments, // (Number of matched fragments) StringUtilities.DblToString(probability, 4), // Probability StringUtilities.DblToString(ExcelMinValue(match.SpecEvalue), 6, true, 0.001), // EValue; will be displayed using scientific notation if the value is less than 0.001 StringUtilities.DblToString(ExcelMinValue(match.SpecEvalue * database.GetNumEntries()), 6, true, 0.001) // SpecEValue; will be displayed using scientific notation if the value is less than 0.001 ); } } }
public void CountMatchedScansPerProtein() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const int minTagLength = 6; var proteinToScan = new Dictionary<string, HashSet<int>>(); const string fastaFilePath = @"D:\Research\Data\CommonContaminants\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); Console.WriteLine(@"Sequence length: {0}", fastaDb.GetSequence().Length); //const string tagFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3_seqtag.tsv"; //const string tagFilePath = @"\\protoapps\UserData\Jungkap\Co_culture\23B_pellet_TD_3Feb14_Bane_PL011402.seqtag"; const string tagFilePath = @"D:\MassSpecFiles\co_culture\23A_pellet_TD_3Feb14_Bane_PL011402.seqtag"; if (!File.Exists(tagFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFilePath); } var isHeader = true; var numMatchedPairs = 0; foreach (var line in File.ReadAllLines(tagFilePath)) { if (isHeader) { isHeader = false; continue; } var token = line.Split('\t'); if (token.Length != 3) continue; var scan = Convert.ToInt32(token[0]); var tag = token[1]; if (tag.Length < minTagLength) continue; foreach (var matchedProtein in searchableDb.FindAllMatchedSequenceIndices(tag) .Select(index => fastaDb.GetProteinName(index))) { ++numMatchedPairs; HashSet<int> matchedScans; if (proteinToScan.TryGetValue(matchedProtein, out matchedScans)) { matchedScans.Add(scan); } else { matchedScans = new HashSet<int> {scan}; proteinToScan.Add(matchedProtein, matchedScans); } } } var numMatchedProteins = proteinToScan.Keys.Count; var numAllProteins = fastaDb.GetNumEntries(); Console.WriteLine("NumAllProteins: {0}", numAllProteins); Console.WriteLine("NumMatchedProteins: {0}", numMatchedProteins); Console.WriteLine("AvgMatchedScansPerProtein: {0}", numMatchedPairs / (float)numAllProteins); }
public void TestFasta() { var db = new FastaDatabase(@"\\protoapps\UserData\Jungkap\Lewy\db\ID_005140_7A170668.fasta"); Console.WriteLine(db.GetNumEntries()); }