public void GetProteinAccessions() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string uniprotAccession = "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"; var uniProtPattern = new Regex(uniprotAccession); const string databaseFilePath = @"H:\Research\IPRG2015\Henry_results\iPRG2015.TargDecoy.fasta"; if (!File.Exists(databaseFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, databaseFilePath); } var database = new FastaDatabase(databaseFilePath); database.Read(); var nameToAccession = new Dictionary <string, string>(); foreach (var proteinName in database.GetProteinNames()) { var start = proteinName.IndexOf('|'); var end = proteinName.LastIndexOf('|'); //var accession = proteinName.Substring(start + 1, end - start - 1); var name = proteinName.Substring(end + 1); if (proteinName.StartsWith("DECOY")) { name = name + "-DECOY"; } // Console.WriteLine(name + " -> " +accession); Assert.IsTrue(uniProtPattern.IsMatch(proteinName)); nameToAccession.Add(name, proteinName); // Console.WriteLine(name); } const string resultPath = @"H:\Research\IPRG2015\Henry_results\ProteinNames.txt"; if (!File.Exists(resultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultPath); } foreach (var line in File.ReadLines(resultPath)) { if (line.Length == 0) { continue; } var name = line; // if (name.Contains(";")) // { // } name = name.Split()[0]; if (name.Contains('|')) { name = name.Substring(name.LastIndexOf('|') + 1); } string proteinName; if (nameToAccession.TryGetValue(name, out proteinName)) { Console.WriteLine(proteinName); } else { Console.WriteLine(name); Assert.IsTrue(false); } } }