Пример #1
0
        public void GetProteinAccessions()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string uniprotAccession = "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}";
            var          uniProtPattern   = new Regex(uniprotAccession);
            const string databaseFilePath = @"H:\Research\IPRG2015\Henry_results\iPRG2015.TargDecoy.fasta";

            if (!File.Exists(databaseFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, databaseFilePath);
            }

            var database = new FastaDatabase(databaseFilePath);

            database.Read();
            var nameToAccession = new Dictionary <string, string>();

            foreach (var proteinName in database.GetProteinNames())
            {
                var start = proteinName.IndexOf('|');
                var end   = proteinName.LastIndexOf('|');
                //var accession = proteinName.Substring(start + 1, end - start - 1);
                var name = proteinName.Substring(end + 1);
                if (proteinName.StartsWith("DECOY"))
                {
                    name = name + "-DECOY";
                }
//                Console.WriteLine(name + " -> " +accession);
                Assert.IsTrue(uniProtPattern.IsMatch(proteinName));
                nameToAccession.Add(name, proteinName);
//                Console.WriteLine(name);
            }

            const string resultPath = @"H:\Research\IPRG2015\Henry_results\ProteinNames.txt";

            if (!File.Exists(resultPath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultPath);
            }

            foreach (var line in File.ReadLines(resultPath))
            {
                if (line.Length == 0)
                {
                    continue;
                }
                var name = line;
//                if (name.Contains(";"))
//                {
//                }
                name = name.Split()[0];
                if (name.Contains('|'))
                {
                    name = name.Substring(name.LastIndexOf('|') + 1);
                }
                string proteinName;
                if (nameToAccession.TryGetValue(name, out proteinName))
                {
                    Console.WriteLine(proteinName);
                }
                else
                {
                    Console.WriteLine(name);
                    Assert.IsTrue(false);
                }
            }
        }