Пример #1
0
        public void TestGenBankFailureWhenParsingEmpty()
        {
            ISequenceParser parser = new GenBankParser();
            bool            failed = false;

            // test ParseOne
            try
            {
                ISequence seq = parser.ParseOne(new StringReader(string.Empty));
                failed = true;
            }
            catch (Exception)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string.");
            }

            // test Parse
            try
            {
                IList <ISequence> seqList = parser.Parse(new StringReader(string.Empty));
                failed = true;
            }
            catch (Exception)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for calling Parse on reader containing empty string.");
            }
        }
Пример #2
0
        public void TestGenBankWhenUserSetsDnaAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser(_singleDnaSeqGenBankFilename);

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse().FirstOrDefault();

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName);
            string             actual    = string.Empty;

            (formatter as GenBankFormatter).Write(seq);
            formatter.Close();

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", ""));
        }
Пример #3
0
        public void GenBankFormatterValidateWriteUsingStream()
        {
            InitializeXmlVariables();

            // Create a Sequence with all attributes.
            // Parse and update the properties instead of parsing entire file.
            using (ISequenceParser parser1 = new GenBankParser(FilePath))
            {
                IEnumerable <ISequence> seqList1 = parser1.Parse();
                string          tempFileName     = System.IO.Path.GetTempFileName();
                GenBankMetadata metadata         = null;
                ISequence       seq = null;
                string          expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                Sequence orgSeq =
                    new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);
                orgSeq.Metadata.Add("GenBank",
                                    (GenBankMetadata)seqList1.ElementAt(0).Metadata["GenBank"]);
                orgSeq.ID = seqList1.ElementAt(0).ID;

                using (ISequenceFormatter formatter = new GenBankFormatter())
                {
                    using (StreamWriter writer = new StreamWriter(tempFileName))
                    {
                        formatter.Open(writer);
                        formatter.Write(orgSeq);
                    }
                }
                using (GenBankParser parserObj = new GenBankParser(tempFileName))
                {
                    IEnumerable <ISequence> seqList = parserObj.Parse();
                    seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    metadata = (GenBankMetadata)seq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
                    }
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray()));
                ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
                Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence));
                File.Delete(tempFileName);
            }
        }
Пример #4
0
        public void TestGenBankWhenUserSetsProteinAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault();

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();

            using (formatter.Open(TempGenBankFileName))
                formatter.Format(seq);

            string actual = string.Empty;

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }

            File.Delete(TempGenBankFileName);
            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", ""));
        }
Пример #5
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir
            ISequenceParser    parser    = new GenBankParser();
            ISequenceFormatter formatter = new GenBankFormatter();

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);

            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);

                try
                {
                    foreach (Sequence sequence in parser.Parse(fileInfo.FullName))
                    {
                        // don't do anything with it; just make sure it doesn't crash
                        formatter.FormatString(sequence);
                    }

                    ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine);
                }
                catch (Exception e)
                {
                    ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine);
                    throw;
                }
            }
        }
Пример #6
0
        public void TestGenBankWhenUserSetsDnaAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse(_singleDnaSeqGenBankFilename).FirstOrDefault();

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();

            formatter.Format(seq, TempGenBankFileName);

            string actual = string.Empty;

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(Utility.CleanupWhiteSpace(_singleDnaSeqGenBankFileExpectedOutput),
                            Utility.CleanupWhiteSpace(actual));
        }
Пример #7
0
        protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext)
        {
            String        inputFileName = InputFile;
            GenBankParser parser        = new GenBankParser();

            SequenceList = parser.Parse(inputFileName);
            return(ActivityExecutionStatus.Closed);
        }
Пример #8
0
        public void TestGenBankLocusTokenParser()
        {
            // parse
            GenBankParser parser = new GenBankParser();
            ISequence     seq    = parser.Parse(_genBankFile_LocusTokenParserTest).FirstOrDefault();

            Assert.IsNotNull(seq);
        }
Пример #9
0
        public void TestGenBankParseVersionEmpty()
        {
            // parse
            GenBankParser parser = new GenBankParser();
            ISequence     seq    = parser.Parse(_genBankFile_ParseVersionEmpty).FirstOrDefault();

            Assert.IsNotNull(seq);
        }
Пример #10
0
        public void TestParsingREFSEQPrimaryHeader()
        {
            // Test parsing Primary header which contains table with header.
            // REFSEQ_SPAN         PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP
            GenBankParser parser = new GenBankParser(_genBankFile_WithREFSEQPrimaryData);

            parser.Parse();
        }
Пример #11
0
        public void TestGenBankParseOriginShifted2()
        {
            // parse
            GenBankParser parser = new GenBankParser();
            ISequence     seq    = parser.Parse(_genBankFile_ParseOriginShifted2).FirstOrDefault();

            Assert.IsNotNull(seq);
        }
Пример #12
0
        public void TestGenBankEmptyOrganismClassification()
        {
            // parse
            GenBankParser parser = new GenBankParser();
            ISequence     seq    = parser.Parse(_genBankFile_EmptyOrganismClassificationTest).FirstOrDefault();

            Assert.IsNotNull(seq);
        }
Пример #13
0
        public void GenBankFormatterWithParseValidateWriteFilePath()
        {
            InitializeXmlVariables();
            // parse
            using (ISequenceParser parserObj = new GenBankParser(FilePath))
            {
                IEnumerable <ISequence> seqList = parserObj.Parse();
                ISequence seq          = seqList.ElementAt(0);
                string    tempFileName = System.IO.Path.GetTempFileName();
                using (ISequenceFormatter formatter = new GenBankFormatter(tempFileName))
                {
                    formatter.Write(seq);
                    formatter.Close();

                    // parse
                    ISequenceParser parserObjFromFile = new GenBankParser(tempFileName);
                    seqList = parserObjFromFile.Parse();
                    seq     = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    GenBankMetadata metadata =
                        (GenBankMetadata)seq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType,
                                        metadata.Locus.Strand.ToString());
                    }
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                    metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                    Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                    Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                    metadata.Locus.Date);
                    Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                    // test the sequence string
                    Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray()));

                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Sequence");
                    Console.WriteLine(string.Format((IFormatProvider)null,
                                                    "GenBank Formatter BVT: Successfully validated the Sequence '{0}'",
                                                    ExpectedSequence));
                    parserObjFromFile.Close();
                    parserObjFromFile.Dispose();
                    File.Delete(tempFileName);
                }
            }
        }
Пример #14
0
        public void TestGenBankWhenParsingOne()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence       seq    = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault();

            // test the non-metadata properties
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);
            Assert.AreEqual("SCU49845", seq.ID);

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None);
            Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower(CultureInfo.CurrentCulture));
            Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse("21-JUN-1999", (IFormatProvider)null), metadata.Locus.Date);
            Assert.AreEqual("1", metadata.Version.Version);
            Assert.AreEqual("1293613", metadata.Version.GiNumber);

            // test that we're correctly putting all types of metadata in the right places
            Assert.AreEqual(1, seq.Metadata.Count);
            IList <CitationReference> referenceList = metadata.References;

            Assert.AreEqual(3, referenceList.Count);
            IList <FeatureItem> featureList = metadata.Features.All;

            Assert.AreEqual(6, featureList.Count);
            Assert.AreEqual(4, featureList[0].Qualifiers.Count);
            Assert.AreEqual(5, featureList[1].Qualifiers.Count);
            Assert.AreEqual(1, featureList[2].Qualifiers.Count);

            // test the sequence string
            string expected = @"gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc";

            Assert.AreEqual(expected, new string(seq.Select(a => (char)a).ToArray()));

            // format
            ISequenceFormatter formatter = new GenBankFormatter();

            formatter.Format(seq, TempGenBankFileName);

            string actual = string.Empty;

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(Utility.CleanupWhiteSpace(_singleProteinSeqGenBankFileExpectedOutput),
                            Utility.CleanupWhiteSpace(actual));
        }
Пример #15
0
        public void TestGenBankWhenParsingMultiple()
        {
            // parse
            ISequenceParser         parser  = new GenBankParser();
            IEnumerable <ISequence> seqList = parser.Parse(_multipleSeqGenBankFilename);

            // Just check the number of items returned and that they're not empty.  The guts
            // are tested in TestGenBankWhenParsingOne.
            Assert.AreEqual(2, seqList.Count());
            Assert.AreEqual(105, seqList.ElementAt(0).Count);
            Assert.AreEqual(5028, seqList.ElementAt(1).Count);
        }
Пример #16
0
        public void GenBankParserValidateParseFileName()
        {
            InitializeXmlVariables();
            // parse
            ISequenceParser   parserObj = new GenBankParser();
            IList <ISequence> seqList   = parserObj.Parse(FilePath);

            ISequence seq = seqList[0];

            // test the non-metadata properties
            if (0 == string.Compare(IsSequenceReadOnly, "true", false,
                                    CultureInfo.CurrentCulture))
            {
                Assert.IsTrue(seq.IsReadOnly);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the ReadOnly Property");
            }

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType,
                                metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                            metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                            metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                            ExpectedSequence));
        }
Пример #17
0
        public void GenBankFormatterValidateWriteWithFilePath()
        {
            InitializeXmlVariables();
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable <ISequence> seqList1 = parserObj.Parse(FilePath);
                string tempFileName            = Path.GetTempFileName();
                string expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence)
                {
                    ID = seqList1.ElementAt(0).ID
                };
                orgSeq.Metadata.Add("GenBank", seqList1.ElementAt(0).Metadata["GenBank"]);
                ISequenceFormatter formatter = new GenBankFormatter();
                {
                    formatter.Format(orgSeq, tempFileName);

                    // parse
                    ISequenceParser         parserObjFromFile = new GenBankParser();
                    IEnumerable <ISequence> seqList           = parserObjFromFile.Parse(tempFileName);
                    ISequence seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    var metadata =
                        (GenBankMetadata)orgSeq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType,
                                        metadata.Locus.Strand.ToString());
                    }
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                    metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                    Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                    Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                    metadata.Locus.Date);
                    Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                    // test the sequence string
                    Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray()));
                    ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
                    File.Delete(tempFileName);
                }
            }
        }
Пример #18
0
        public void GenBankParserValidateParseFileNameWithStream()
        {
            InitializeXmlVariables();
            List <ISequence>        seq     = null;
            IEnumerable <ISequence> seqList = null;

            // Parse the Stream.
            using (ISequenceParser parserObj = new GenBankParser())
            {
                using (StreamReader reader = new StreamReader(FilePath))
                {
                    seqList = parserObj.Parse(reader);
                    seq     = seqList.ToList();
                }

                Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq[0].Alphabet);
                Assert.AreEqual(SeqId, seq[0].ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                GenBankMetadata metadata = (GenBankMetadata)seq[0].Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);

                Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, new string(seq[0].Select(a => (char)a).ToArray()));

                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Sequence");
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                                ExpectedSequence));
            }
        }
Пример #19
0
        public void ValidateBasicDerivedSequenceWithGenBankFormat()
        {
            // Gets the expected sequence from the Xml
            string expectedSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.ExpectedSequenceNode);
            string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.FilePathNode);

            Assert.IsTrue(File.Exists(geneBankFilePath));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: Sequence BVT: The File exist in the Path ", geneBankFilePath));

            // Parse a GenBank file Using Parse method and convert the same to sequence.
            ISequenceParser parser = new GenBankParser();

            IList <ISequence> sequence = parser.Parse(geneBankFilePath);

            Assert.IsNotNull(sequence);
            Sequence geneBankSeq = (Sequence)sequence[0];

            Assert.IsNotNull(geneBankSeq);
            Assert.AreEqual(expectedSequence, geneBankSeq.ToString());
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The GenBank Sequence is as expected."));

            byte[] tmpEncodedSeq = new byte[geneBankSeq.Count];
            (geneBankSeq as IList <byte>).CopyTo(tmpEncodedSeq, 0);
            Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length);
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The GenBank Sequence Length is as expected."));

            // Create a derived Sequences for the fastA file sequence.
            BasicDerivedSequence genebankDerivedSeq =
                new BasicDerivedSequence(geneBankSeq, false, false, -1, -1);

            // validate the DerivedSequence with originalSequence.
            Assert.IsNotNull(genebankDerivedSeq);
            Assert.AreEqual(expectedSequence, genebankDerivedSeq.ToString());
            Assert.AreEqual(geneBankSeq.ToString(), genebankDerivedSeq.ToString());
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The BasicDerived Sequence is as expected."));

            // Logs to Nunit GUI.
            Console.WriteLine(
                "Sequence BVT: Validation of GenBank file Sequence is completed successfully.");
        }
Пример #20
0
        public void ValidateSequenceInsertWithGenBankFormat()
        {
            // Gets the expected sequence from the Xml
            string expectedSequenceCount = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.SimpleFastaSequenceCount);
            string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.FilePathNode);
            string expectedSeqAfterAdd = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.ExpectedSeqAfterAdd);
            string seqAfterAdd = string.Empty;

            string alphabetName = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.AlphabetNameNode);
            string actualSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.ExpectedSingleChar);
            string seqBeforeAdding = string.Empty;

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence {0} is expected.", actualSequence, alphabetName));

            Sequence seqItem = new Sequence(Utility.GetAlphabet(alphabetName), "TCGN");

            Assert.IsTrue(File.Exists(geneBankFilePath));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: The File exist in the Path {0}.", geneBankFilePath));

            ISequenceParser parser = new GenBankParser();

            // Parse a FastA file Using Parse method and convert the same to sequence.
            IList <ISequence> sequence = parser.Parse(geneBankFilePath);
            Sequence          Seq      = (Sequence)sequence[0];

            Seq.IsReadOnly  = false;
            seqBeforeAdding = Seq.ToString();
            Seq.Add(seqItem[0]);

            // Validate sequence list after adding sequence item to the sequence list.
            seqAfterAdd = Seq.ToString();
            Assert.AreEqual(seqAfterAdd, expectedSeqAfterAdd);
            Assert.AreNotEqual(seqAfterAdd, seqBeforeAdding);
            Assert.AreEqual(Seq.Count.ToString((IFormatProvider)null), expectedSequenceCount);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence {0} is expected.", seqAfterAdd));
        }
Пример #21
0
        public void GenBankFeatures()
        {
            // parse
            ISequenceParser       parser   = new GenBankParser(_singleProteinSeqGenBankFilename);
            ISequence             seq      = parser.Parse().FirstOrDefault();
            GenBankMetadata       metadata = seq.Metadata["GenBank"] as GenBankMetadata;
            List <CodingSequence> CDS      = metadata.Features.CodingSequences;

            Assert.AreEqual(CDS.Count, 3);
            Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1);
            Assert.AreEqual(CDS[0].GeneSymbol, string.Empty);
            Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1);
            Assert.IsFalse(CDS[0].Pseudo);
            Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2);
            Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0);
            parser.Close();
            parser.Dispose();
            ISequenceParser parser1 = new GenBankParser(_genBankDataPath + @"\NC_001284.gbk");
            ISequence       seq1    = parser1.Parse().FirstOrDefault();

            metadata = seq1.Metadata["GenBank"] as GenBankMetadata;
            Assert.AreEqual(metadata.Features.All.Count, 743);
            Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
            Assert.AreEqual(metadata.Features.Exons.Count, 32);
            Assert.AreEqual(metadata.Features.Introns.Count, 22);
            Assert.AreEqual(metadata.Features.Genes.Count, 60);
            Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
            Assert.AreEqual(metadata.Features.Promoters.Count, 17);
            Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
            Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
            Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117);
            ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation();

            byte[] tempData = new byte[seqTemp.Count];
            for (int i = 0; i < seqTemp.Count; i++)
            {
                tempData[i] = seqTemp[i];
            }
            string sequenceInString = ASCIIEncoding.ASCII.GetString(tempData);

            Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"'));
            Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count);
        }
Пример #22
0
        void InvalidateGenBankParser(string node)
        {
            // Initialization of xml strings.
            FilePath = utilityObj.xmlUtil.GetTextValue(node,
                                                       Constants.FilePathNode);

            try
            {
                GenBankParser parserObj = new GenBankParser(FilePath);
                if (string.Equals(Constants.SimpleGenBankNodeName, node))
                {
                    parserObj.LocationBuilder = null;
                }
                else if (string.Equals(Constants.SimpleGenBankPrimaryNode, node))
                {
                    parserObj.Alphabet = Alphabets.RNA;
                }

                //parserObj.ParseOne(FilePath);
                parserObj.Parse();
                Assert.Fail();
            }
            catch (InvalidOperationException)
            {
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
                Console.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
            }
            catch (InvalidDataException)
            {
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
                Console.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
            }
            catch (Exception)
            {
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
                Console.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
            }
        }
Пример #23
0
        public void TestGenBankFailureWhenParsingEmpty()
        {
            bool failed = false;

            try
            {
                ISequenceParser parser = new GenBankParser();
                parser.Parse();
                failed = true;
            }
            catch (Exception)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string.");
            }
        }
Пример #24
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);

            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);
                ISequenceParser parser = new GenBankParser(fileInfo.FullName);


                try
                {
                    IEnumerable <ISequence> seqList = parser.Parse();
                    // don't do anything with it; just make sure it doesn't crash
                    ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName);
                    string             actual    = string.Empty;

                    (formatter as GenBankFormatter).Write(seqList.ToList());

                    using (StreamReader reader = new StreamReader(TempGenBankFileName))
                    {
                        actual = reader.ReadToEnd();
                    }
                    File.Delete(TempGenBankFileName);
                    parser.Close();
                    parser.Dispose();

                    ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine);
                }
                catch (Exception e)
                {
                    ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine);
                    throw;
                }
            }
        }
Пример #25
0
        public void GenBankParserValidateParseFileName()
        {
            InitializeXmlVariables();

            // parse
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable <ISequence> seqList = parserObj.Parse(FilePath);
                ISequence seq = seqList.ElementAt(0);
                Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                Assert.AreEqual(SeqId, seq.ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                var metadata = (GenBankMetadata)seq.Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);

                Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, seq.ConvertToString());

                ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the Sequence");
            }
        }
Пример #26
0
        public void GenBankFormatterValidateReadAndWriteMultipleDBLinks()
        {
            // Create a Sequence with all attributes.
            // parse and update the properties instead of parsing entire file.
            string          tempFileName = Path.GetTempFileName();
            ISequenceParser parser1      = new GenBankParser();

            using (parser1.Open(_genBankFile_WithMultipleDBLines))
            {
                var orgSeq = parser1.Parse().First();
                ISequenceFormatter formatter = new GenBankFormatter();
                using (formatter.Open(tempFileName))
                {
                    formatter.Format(orgSeq);
                    formatter.Close();
                }
            }
            var same = Utility.CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines);

            File.Delete(tempFileName);
            Assert.IsTrue(same);
            ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop");
        }
Пример #27
0
        public void TestGenBankWhenUserSetsIncorrectAlphabet()
        {
            // parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.Protein;
            bool failed = false;

            try
            {
                var seqList = parser.Parse(_singleDnaSeqGenBankFilename);
                var x       = seqList.ElementAt(0);
                failed = true;
            }
            catch (InvalidDataException)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for trying to create sequence using incorrect alphabet.");
            }
        }
Пример #28
0
        public void GenBankFormatterValidateFormatTextWriter()
        {
            InitializeXmlVariables();
            // Create a Sequence with all attributes.
            // parse and update the properties instead of parsing entire file.
            ISequenceParser   parser1  = new GenBankParser();
            IList <ISequence> seqList1 = parser1.Parse(FilePath);

            string expectedUpdatedSequence =
                ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
            Sequence orgSeq =
                new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);

            orgSeq.Metadata.Add("GenBank",
                                (GenBankMetadata)seqList1[0].Metadata["GenBank"]);
            orgSeq.ID           = seqList1[0].ID;
            orgSeq.DisplayID    = seqList1[0].DisplayID;
            orgSeq.MoleculeType = seqList1[0].MoleculeType;

            ISequenceFormatter formatter = new GenBankFormatter();

            using (TextWriter writer =
                       new StreamWriter(Constants.GenBankTempFileName))
            {
                formatter.Format(orgSeq, writer);
            }

            // parse
            GenBankParser     parserObj = new GenBankParser();
            IList <ISequence> seqList   = parserObj.Parse(Constants.GenBankTempFileName);

            ISequence seq = seqList[0];

            // test the non-metadata properties
            if (0 == string.Compare(IsSequenceReadOnly, "true",
                                    false, CultureInfo.CurrentCulture))
            {
                Assert.IsTrue(seq.IsReadOnly);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the ReadOnly Property");
            }

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence));

            File.Delete(Constants.GenBankTempFileName);
        }
Пример #29
0
        public void GenBankParserValidateParseOneWithSpecificFormats()
        {
            InitializeXmlVariables();
            // Initialization of xml strings.
            FilePath = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.FilePathNode);
            AlphabetName = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.AlphabetNameNode);
            SeqId = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.SequenceIdNode);
            StrandTopology = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTopologyNode);
            StrandType = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTypeNode);
            Div = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DivisionNode);
            Version = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.VersionNode);
            SequenceDate = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DateNode);
            PrimaryId = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.PrimaryIdNode);
            ExpectedSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.ExpectedSequenceNode);

            // parse
            using (ISequenceParser parserObj = new GenBankParser(FilePath))
            {
                parserObj.Alphabet = Alphabets.Protein;
                IEnumerable <ISequence> seq = parserObj.Parse();

                Assert.AreEqual(Utility.GetAlphabet(AlphabetName),
                                seq.ElementAt(0).Alphabet);
                Assert.AreEqual(SeqId, seq.ElementAt(0).ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                GenBankMetadata metadata = (GenBankMetadata)seq.ElementAt(0).Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(
                                    CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, new string(seq.ElementAt(0).Select(a => (char)a).ToArray()));
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Sequence");
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                                ExpectedSequence));
            }
        }