Exemple #1
0
        public void GetProteinAccessions()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string uniprotAccession = "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}";
            var uniProtPattern = new Regex(uniprotAccession);
            const string databaseFilePath = @"H:\Research\IPRG2015\Henry_results\iPRG2015.TargDecoy.fasta";
            if (!File.Exists(databaseFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, databaseFilePath);
            }

            var database = new FastaDatabase(databaseFilePath);
            database.Read();
            var nameToAccession = new Dictionary<string, string>();
            foreach (var proteinName in database.GetProteinNames())
            {
                var start = proteinName.IndexOf('|');
                var end = proteinName.LastIndexOf('|');
                //var accession = proteinName.Substring(start + 1, end - start - 1);
                var name = proteinName.Substring(end + 1);
                if (proteinName.StartsWith("DECOY")) name = name + "-DECOY";
//                Console.WriteLine(name + " -> " +accession);
                Assert.IsTrue(uniProtPattern.IsMatch(proteinName));
                nameToAccession.Add(name, proteinName);
//                Console.WriteLine(name);
            }

            const string resultPath = @"H:\Research\IPRG2015\Henry_results\ProteinNames.txt";
            if (!File.Exists(resultPath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultPath);
            }

            foreach (var line in File.ReadLines(resultPath))
            {
                if (line.Length == 0) continue;
                var name = line;
//                if (name.Contains(";"))
//                {
//                }
                name = name.Split()[0];
                if (name.Contains('|')) name = name.Substring(name.LastIndexOf('|') + 1);
                string proteinName;
                if (nameToAccession.TryGetValue(name, out proteinName))
                {
                    Console.WriteLine(proteinName);
                }
                else
                {
                    Console.WriteLine(name);
                    Assert.IsTrue(false);
                }
            }
        }