/// <summary>
        /// Reads the Genbank file and have it parsed by MBF library.
        /// </summary>
        /// <param name="genbankFileURL">Your genbank file path</param>
        /// <returns></returns>
        private SequenceList ParseSequencePath
                                    (string genbankFileURL)
        {
            if (IsOnline)  
                throw new NotImplementedException
                    ("online genbank reading is not supported in this version!"); 
            
            //Download the file and parse it

            //Create the parser first
            ISequenceParser gbParser = new GenBankParser();

            //Always Try parsing multi sequence in a file
            List<ISequence> mbfSequences = gbParser.Parse(genbankFileURL);

            SequenceList bioSeqList = new SequenceList();

            foreach (Sequence mbfseq in mbfSequences)
            {
                ConvertToBioPatMLSeq(mbfseq);
                bioSeqList.Add(ConvertToBioPatMLSeq(mbfseq));
            }

            return bioSeqList;
        }
Exemple #2
0
        private static void initiate_Updater()
        {
            int i = 1;

            Console.WriteLine("Connecting to database prokaryote_schema...");
            try
            {
                conn = new MySqlConnection("Server=127.0.0.1;Database=prokaryote_schema;Uid=root;Pwd=Anitar@n@");
                conn.Open();
                Console.WriteLine("Connected to database prokaryote_schema\n");

                newbacteriaspath = new List <string>();
                extract_newbacterias();
                Console.WriteLine("\nNumber of bacterias to be parsed to database: {0}\n", newbacteriaspath.Count);
                Console.WriteLine("Press any key to initiate the parsing");
                Console.ReadKey();
                Console.WriteLine("Loading newbacterias to database...");

                foreach (var bacteria in newbacteriaspath)
                {
                    Console.WriteLine("Parsing .gbff file [{0}/{1}]", i, newbacteriaspath.Count);
                    List <ISequence> sequences = new List <ISequence>();
                    ISequenceParser  parser    = new Bio.IO.GenBank.GenBankParser();
                    using (parser.Open(bacteria))
                    {
                        sequences = parser.Parse().ToList();
                    }
                    int j = 1;
                    foreach (var sequence in sequences)
                    {
                        Console.WriteLine("\tParsingSequence [{0}/{1}]", j, sequences.Count);
                        parse_to_database(sequence);
                        j++;
                    }
                    Console.WriteLine();
                    append_new_path(bacteria);
                    i++;
                    prokaryote_id = 0;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("\nError occured: " + ex.Message);
            }
            finally
            {
                if (i - 1 == newbacteriaspath.Count && newbacteriaspath.Count != 0)
                {
                    Console.WriteLine("\nDatabase Updated Successfully");
                }
                else
                {
                    Console.WriteLine("\nDatabase is not Updated");
                }
            }
        }
        //this functiion parses the data from .gbff file
        private void extract_sequences()
        {
            ISequenceParser parser = new Bio.IO.GenBank.GenBankParser();

            //Console.WriteLine(path);
            using (parser.Open(this.path))
            {
                this.sequences = parser.Parse().ToList();
            }
        }
        /// <summary>
        /// The param could also be a stringreader.
        /// </summary>
        /// <param name="reader"></param>
        /// <returns></returns>
        private SequenceList ParseSequencePath
                                    (TextReader reader)
        {
            //Create the parser first
            ISequenceParser gbParser = new GenBankParser();

            //Always Try parsing multi sequence in a reader
            List<ISequence> mbfSequences = gbParser.Parse(reader);

            SequenceList bioSeqList = new SequenceList();

            foreach (Sequence mbfseq in mbfSequences)
            {
                ConvertToBioPatMLSeq(mbfseq);
                bioSeqList.Add(ConvertToBioPatMLSeq(mbfseq));
            }

            return bioSeqList;
        }
Exemple #5
0
        public void GenBankParserValidateParseFileName()
        {
            InitializeXmlVariables();

            // parse            
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable<ISequence> seqList = parserObj.Parse(FilePath);
                ISequence seq = seqList.ElementAt(0);
                Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                Assert.AreEqual(SeqId, seq.ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                var metadata = (GenBankMetadata) seq.Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);

                Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string            
                Assert.AreEqual(ExpectedSequence, seq.ConvertToString());

                ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the Sequence");
            }
        }
Exemple #6
0
 public void GenBankFormatterValidateReadAndWriteMultipleDBLinks()
 {
     // Create a Sequence with all attributes.
     // parse and update the properties instead of parsing entire file.   
     string tempFileName = Path.GetTempFileName();
     ISequenceParser parser1 = new GenBankParser();
     using (parser1.Open(_genBankFile_WithMultipleDBLines))
     {
         var orgSeq = parser1.Parse().First();
         ISequenceFormatter formatter = new GenBankFormatter();
         using (formatter.Open(tempFileName))
         {
             formatter.Format(orgSeq);
             formatter.Close();
         }
     }
     var same = CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines);
     File.Delete(tempFileName);
     Assert.IsTrue(same);
     ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop");
 }
Exemple #7
0
        public void GenBankFeatures()
        {
            // parse
            ISequence seq = new GenBankParser()
                .Parse(_singleProteinSeqGenBankFilename)
                .FirstOrDefault();
            Assert.IsNotNull(seq);

            GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata;
            Assert.IsNotNull(metadata);

            List<CodingSequence> CDS = metadata.Features.CodingSequences;
            Assert.AreEqual(CDS.Count, 3);
            Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1);
            Assert.AreEqual(CDS[0].GeneSymbol, string.Empty);
            Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1);
            Assert.IsFalse(CDS[0].Pseudo);
            Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2);
            Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0);

            ISequence seq1 = new GenBankParser()
                .Parse(_genBankDataPath + @"\NC_001284.gbk")
                .FirstOrDefault();
            Assert.IsNotNull(seq1);

            metadata = seq1.Metadata["GenBank"] as GenBankMetadata;
            Assert.IsNotNull(metadata);
            Assert.AreEqual(metadata.Features.All.Count, 743);
            Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
            Assert.AreEqual(metadata.Features.Exons.Count, 32);
            Assert.AreEqual(metadata.Features.Introns.Count, 22);
            Assert.AreEqual(metadata.Features.Genes.Count, 60);
            Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
            Assert.AreEqual(metadata.Features.Promoters.Count, 17);
            Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
            Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
            Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117);
            
            ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation();
            byte[] tempData = new byte[seqTemp.Count];
            for (int i = 0; i < seqTemp.Count; i++)
            {
                tempData[i] = seqTemp[i];
            }
            string sequenceInString = Encoding.ASCII.GetString(tempData);
            Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"'));
            Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count);
        }
Exemple #8
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);
            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);

                IEnumerable<ISequence> seqList = new GenBankParser().Parse(fileInfo.FullName);

                ISequenceFormatter formatter = new GenBankFormatter();
                using (formatter.Open(TempGenBankFileName))
                {
                    (formatter as GenBankFormatter).Format(seqList.ToList());
                }

                using (var reader = new StreamReader(TempGenBankFileName))
                {
                    string actual = reader.ReadToEnd();
                }
                
                File.Delete(TempGenBankFileName);
            }
        }
Exemple #9
0
        public void TestGenBankWhenUserSetsDnaAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();
            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse(_singleDnaSeqGenBankFilename).FirstOrDefault();
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            formatter.Format(seq, TempGenBankFileName);

            string actual = string.Empty;
            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", ""));
        }
Exemple #10
0
        public void TestGenBankWhenParsingOne()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault();

            // test the non-metadata properties
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);
            Assert.AreEqual("SCU49845", seq.ID);

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None);
            Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower(CultureInfo.CurrentCulture));
            Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse("21-JUN-1999", (IFormatProvider)null), metadata.Locus.Date);
            Assert.AreEqual("1", metadata.Version.Version);
            Assert.AreEqual("1293613", metadata.Version.GiNumber);

            // test that we're correctly putting all types of metadata in the right places
            Assert.AreEqual(1, seq.Metadata.Count);
            IList<CitationReference> referenceList = metadata.References;
            Assert.AreEqual(3, referenceList.Count);
            IList<FeatureItem> featureList = metadata.Features.All;
            Assert.AreEqual(6, featureList.Count);
            Assert.AreEqual(4, featureList[0].Qualifiers.Count);
            Assert.AreEqual(5, featureList[1].Qualifiers.Count);
            Assert.AreEqual(1, featureList[2].Qualifiers.Count);

            // test the sequence string
            string expected = @"gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc";
            Assert.AreEqual(expected, new string(seq.Select(a => (char)a).ToArray()));

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            formatter.Format(seq, TempGenBankFileName);

            string actual = string.Empty;
            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", ""));
        }
Exemple #11
0
 public void TestGenBankParseOriginShifted2()
 {
     // parse
     GenBankParser parser = new GenBankParser();
     ISequence seq = parser.Parse(_genBankFile_ParseOriginShifted2).FirstOrDefault();
     Assert.IsNotNull(seq);
 }
Exemple #12
0
 public void TestGenBankEmptyOrganismClassification()
 {
     // parse
     GenBankParser parser = new GenBankParser();
     ISequence seq = parser.Parse(_genBankFile_EmptyOrganismClassificationTest).FirstOrDefault();
     Assert.IsNotNull(seq);
 }
        /// <summary>
        ///     Validate GenBank Get features with specified range.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        /// <param name="methodName">name of method</param>
        private void ValidateGetFeatures(string nodeName, string methodName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string expectedFirstRangeStartPoint = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstRangeStartPoint);
            string expectedSecondRangeStartPoint = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondRangeStartPoint);
            string expectedFirstRangeEndPoint = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstRangeEndPoint);
            string expectedSecondRangeEndPoint = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondRangeEndPoint);
            string expectedCountWithinSecondRange = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FeaturesWithinSecondRange);
            string expectedCountWithinFirstRange = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FeaturesWithinFirstRange);

            // Parse a GenBank file.
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable<ISequence> seq = parserObj.Parse(filePath);
                var metadata =
                    seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata;
                List<CodingSequence> cdsList = metadata.Features.CodingSequences;
                string accessionNumber = cdsList[0].Location.Accession;

                if ((0 == string.Compare(methodName, "Accession",
                                         CultureInfo.CurrentCulture, CompareOptions.IgnoreCase)))
                {
                    // Validate GetFeature within specified range.
                    Assert.AreEqual(metadata.GetFeatures(accessionNumber,
                                                         Convert.ToInt32(expectedFirstRangeStartPoint, null),
                                                         Convert.ToInt32(expectedFirstRangeEndPoint, null))
                                            .Count.ToString((IFormatProvider) null),
                                    expectedCountWithinFirstRange);
                    Assert.AreEqual(metadata.GetFeatures(accessionNumber,
                                                         Convert.ToInt32(expectedSecondRangeStartPoint, null),
                                                         Convert.ToInt32(expectedSecondRangeEndPoint, null))
                                            .Count.ToString((IFormatProvider) null),
                                    expectedCountWithinSecondRange);
                }
                else
                {
                    // Validate GetFeature within specified range.
                    Assert.AreEqual(metadata.GetFeatures(
                        Convert.ToInt32(expectedFirstRangeStartPoint, null),
                        Convert.ToInt32(expectedFirstRangeEndPoint, null)).Count.ToString((IFormatProvider) null),
                                    expectedCountWithinFirstRange);
                    Assert.AreEqual(metadata.GetFeatures(
                        Convert.ToInt32(expectedSecondRangeStartPoint, null),
                        Convert.ToInt32(expectedSecondRangeEndPoint, null)).Count.ToString((IFormatProvider) null),
                                    expectedCountWithinSecondRange);
                }
            }
        }
Exemple #14
0
        public void GenBankFormatterValidateWriteUsingStream()
        {
            InitializeXmlVariables();

            // Create a Sequence with all attributes.
            // Parse and update the properties instead of parsing entire file.            
            ISequenceParser parser1 = new GenBankParser();
            {
                IEnumerable<ISequence> seqList1 = parser1.Parse(FilePath);
                string tempFileName = Path.GetTempFileName();
                GenBankMetadata metadata = null;
                ISequence seq = null;
                string expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq =
                    new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);
                orgSeq.Metadata.Add("GenBank",
                                    seqList1.ElementAt(0).Metadata["GenBank"]);
                orgSeq.ID = seqList1.ElementAt(0).ID;

                ISequenceFormatter formatter = new GenBankFormatter();
                {
                    using (formatter.Open(tempFileName))
                    {
                        formatter.Format(orgSeq);
                    }
                }

                var parserObj = new GenBankParser();
                {
                    IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName);
                    seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting 
                    metadata = (GenBankMetadata) seq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
                    }
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string            
                Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char) a).ToArray()));
                ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
                File.Delete(tempFileName);
            }
        }
Exemple #15
0
        public void GenBankParserValidateParseOneWithSpecificFormats()
        {
            InitializeXmlVariables();
            // Initialization of xml strings.
            FilePath = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.FilePathNode);
            AlphabetName = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.AlphabetNameNode);
            SeqId = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.SequenceIdNode);
            StrandTopology = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTopologyNode);
            StrandType = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTypeNode);
            Div = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DivisionNode);
            Version = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.VersionNode);
            SequenceDate = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DateNode);
            PrimaryId = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.PrimaryIdNode);
            ExpectedSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.ExpectedSequenceNode);

            // parse            
            ISequenceParser parserObj = new GenBankParser();
            {
                parserObj.Alphabet = Alphabets.Protein;
                IEnumerable<ISequence> seq = parserObj.Parse(FilePath);

                Assert.AreEqual(Utility.GetAlphabet(AlphabetName),
                                seq.ElementAt(0).Alphabet);
                Assert.AreEqual(SeqId, seq.ElementAt(0).ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                var metadata = (GenBankMetadata) seq.ElementAt(0).Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(
                                    CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string            
                Assert.AreEqual(ExpectedSequence, new string(seq.ElementAt(0).Select(a => (char) a).ToArray()));
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Sequence");
            }
        }
Exemple #16
0
        /// <summary>
        /// Validates GenBank Formatter for General test cases.
        /// </summary>
        /// <param name="seqList">sequence list.</param>
        private static void ValidateWriteGeneralTestCases(IEnumerable<ISequence> seqList1)
        {
            // Create a Sequence with all attributes.
            // Parse and update the properties instead of parsing entire file.
            string expectedUpdatedSequence =
                ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
            Sequence orgSeq =
                new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);
                orgSeq.Metadata.Add("GenBank",
                    (GenBankMetadata)seqList1.ElementAt(0).Metadata["GenBank"]);
                orgSeq.ID = seqList1.ElementAt(0).ID;
                string tempFileName = System.IO.Path.GetTempFileName();
                ISequenceFormatter formatter = new GenBankFormatter();
                formatter.Format(orgSeq, tempFileName);

                // parse
                GenBankParser parserObj = new GenBankParser();
                IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName);
                ISequence seq = seqList.ElementAt(0);

                Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                Assert.AreEqual(SeqId, seq.ID);
                ApplicationLog.WriteLine(
                    "GenBank Formatter P1: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                GenBankMetadata metadata =
                    (GenBankMetadata)seq.Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                        metadata.Locus.Strand.ToString());
                }
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                        metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                }
                if (metadata.Locus.DivisionCode != SequenceDivisionCode.None)
                {
                    Assert.AreEqual(Div,
                        metadata.Locus.DivisionCode.ToString());
                }
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                    metadata.Locus.Date);

                if (0 != string.Compare(AlphabetName, "rna",
                     CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))
                {
                    Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");
                }
                else
                {
                    ApplicationLog.WriteLine(
                        "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date Properties");
                }

                string truncatedExpectedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper(
                    CultureInfo.CurrentCulture);
                string truncatedActualSequence =
                    new string(seq.Select(a => (char)a).ToArray()).Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper(
                    CultureInfo.CurrentCulture);

                // test the sequence string
                Assert.AreEqual(truncatedExpectedSequence, truncatedActualSequence);
                ApplicationLog.WriteLine(
                    "GenBank Formatter P1: Successfully validated the Sequence");
                File.Delete(tempFileName);
        }
Exemple #17
0
 public void TestGenBankLocusTokenParser()
 {
     // parse
     GenBankParser parser = new GenBankParser();
     ISequence seq = parser.Parse(_genBankFile_LocusTokenParserTest).FirstOrDefault();
     Assert.IsNotNull(seq);
 }
        /// <summary>
        ///     Validate GenBank Citation referenced by passing featureItem present in GenBank Metadata.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateCitationReferencedUsingFeatureItem(string nodeName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string expectedCitationReferenced = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.citationReferencedCount);

            // Parse a GenBank file.           
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable<ISequence> seq = parserObj.Parse(filePath);
                var metadata =
                    seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata;
                IList<FeatureItem> featureList = metadata.Features.All;

                // Get a list citationReferenced present in GenBank file.
                List<CitationReference> citationReferenceList =
                    metadata.GetCitationsReferredInFeature(featureList[0]);

                Assert.AreEqual(citationReferenceList.Count.ToString((IFormatProvider) null),
                                expectedCitationReferenced);
            }
        }
Exemple #19
0
 public void TestGenBankParseVersionEmpty()
 {
     // parse
     GenBankParser parser = new GenBankParser();
     ISequence seq = parser.Parse(_genBankFile_ParseVersionEmpty).FirstOrDefault();
     Assert.IsNotNull(seq);
 }
        /// <summary>
        ///     Validate All qualifiers in CDS feature.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateCDSQualifiers(string nodeName, string methodName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string expectedCDSProduct = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSProductQualifier);
            string expectedCDSException = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSException);
            string expectedCDSCodonStart = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSCodonStart);
            string expectedCDSLabel = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSLabel);
            string expectedCDSDBReference = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSDBReference);
            string expectedGeneSymbol = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.GeneSymbol);

            // Parse a GenBank file.            
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable<ISequence> seq = parserObj.Parse(filePath);
                var metadata =
                    seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata;

                // Get CDS qaulifier.value.
                List<CodingSequence> cdsQualifiers = metadata.Features.CodingSequences;
                List<string> codonStartValue = cdsQualifiers[0].CodonStart;
                List<string> productValue = cdsQualifiers[0].Product;
                List<string> DBReferenceValue = cdsQualifiers[0].DatabaseCrossReference;


                // validate CDS qualifiers.
                if ((0 == string.Compare(methodName, "DNA",
                                         CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))
                    || (0 == string.Compare(methodName, "RNA",
                                            CultureInfo.CurrentCulture, CompareOptions.IgnoreCase)))
                {
                    Assert.AreEqual(cdsQualifiers[0].Label,
                                    expectedCDSLabel);
                    Assert.AreEqual(cdsQualifiers[0].Exception.ToString(null),
                                    expectedCDSException);
                    Assert.AreEqual(productValue[0],
                                    expectedCDSProduct);
                    Assert.AreEqual(codonStartValue[0],
                                    expectedCDSCodonStart);
                    Assert.IsTrue(string.IsNullOrEmpty(cdsQualifiers[0].Allele));
                    Assert.IsFalse(string.IsNullOrEmpty(cdsQualifiers[0].Citation.ToString()));
                    Assert.AreEqual(DBReferenceValue[0],
                                    expectedCDSDBReference);
                    Assert.AreEqual(cdsQualifiers[0].GeneSymbol,
                                    expectedGeneSymbol);
                }
                else
                {
                    Assert.AreEqual(cdsQualifiers[0].Label, expectedCDSLabel);
                    Assert.AreEqual(cdsQualifiers[0].Exception.ToString(null), expectedCDSException);
                    Assert.IsTrue(string.IsNullOrEmpty(cdsQualifiers[0].Allele));
                    Assert.IsFalse(string.IsNullOrEmpty(cdsQualifiers[0].Citation.ToString()));
                    Assert.AreEqual(DBReferenceValue[0], expectedCDSDBReference);
                    Assert.AreEqual(cdsQualifiers[0].GeneSymbol, expectedGeneSymbol);
                }
            }
        }
Exemple #21
0
        public void TestGenBankFailureWhenParsingEmpty()
        {

            bool failed = false;

            try
            {
                ISequenceParser parser = new GenBankParser();
                parser.Parse();
                failed = true;
            }
            catch (Exception)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string.");
            }
        }
        /// <summary>
        ///     Validate Seqeunce feature of GenBank file.
        /// </summary>
        /// <param name="nodeName">xml node name. for different alphabet</param>
        private void ValidateSequenceFeature(string nodeName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string subSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSubSequence);
            string subSequenceStart = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SequenceStart);
            string subSequenceEnd = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SequenceEnd);
            ISequence firstFeatureSeq = null;

            // Parse a genBank file.           
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable<ISequence> seq = parserObj.Parse(filePath);
                var metadata = seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata;

                // Get Subsequence feature,start and end postions.
                firstFeatureSeq = metadata.Features.All[0].GetSubSequence(seq.ElementAt(0));
                var sequenceString = new string(firstFeatureSeq.Select(a => (char) a).ToArray());
                // Validate SubSequence.
                Assert.AreEqual(sequenceString, subSequence);
                Assert.AreEqual(metadata.Features.All[0].Location.LocationStart.ToString((IFormatProvider) null),
                                subSequenceStart);
                Assert.AreEqual(metadata.Features.All[0].Location.LocationEnd.ToString((IFormatProvider) null),
                                subSequenceEnd);
                Assert.IsNull(metadata.Features.All[0].Location.Accession);
                Assert.AreEqual(metadata.Features.All[0].Location.StartData,
                                subSequenceStart);
                Assert.AreEqual(metadata.Features.All[0].Location.EndData,
                                subSequenceEnd);
            }
        }
Exemple #23
0
        public void TestGenBankWhenParsingMultiple()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            IEnumerable<ISequence> seqList = parser.Parse(_multipleSeqGenBankFilename);

            // Just check the number of items returned and that they're not empty.  The guts
            // are tested in TestGenBankWhenParsingOne.
            Assert.AreEqual(2, seqList.Count());
            Assert.AreEqual(105, seqList.ElementAt(0).Count);
            Assert.AreEqual(5028, seqList.ElementAt(1).Count);
        }
        public void ValidateGenBankFeatureProperties()
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.FilePathNode);
            string mRNAFeatureCount = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.mRNACount);
            string exonFeatureCount = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.ExonCount);
            string intronFeatureCount = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.IntronCount);
            string cdsFeatureCount = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.CDSCount);
            string allFeaturesCount = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.GenBankFeaturesCount);
            string GenesCount = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.GeneCount);
            string miscFeaturesCount = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.MiscFeatureCount);
            string rRNACount = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.rRNACount);
            string tRNACount = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.tRNACount);
            string zeroValue = utilityObj.xmlUtil.GetTextValue(
                Constants.DNAStandardFeaturesKeyNode, Constants.emptyCount);

            ISequenceParser parserObj = new GenBankParser();
            IEnumerable<ISequence> seq = parserObj.Parse(filePath);

            // Get all metada features. Hitting all the properties in the metadata feature.
            var metadata = (GenBankMetadata) seq.ElementAt(0).Metadata[Constants.GenBank];
            List<FeatureItem> allFeatures = metadata.Features.All;
            List<Minus10Signal> minus10Signal = metadata.Features.Minus10Signals;
            List<Minus35Signal> minus35Signal = metadata.Features.Minus35Signals;
            List<ThreePrimeUtr> threePrimeUTR = metadata.Features.ThreePrimeUTRs;
            List<FivePrimeUtr> fivePrimeUTR = metadata.Features.FivePrimeUTRs;
            List<Attenuator> attenuator = metadata.Features.Attenuators;
            List<CaatSignal> caatSignal = metadata.Features.CAATSignals;
            List<CodingSequence> CDS = metadata.Features.CodingSequences;
            List<DisplacementLoop> displacementLoop = metadata.Features.DisplacementLoops;
            List<Enhancer> enhancer = metadata.Features.Enhancers;
            List<Exon> exonList = metadata.Features.Exons;
            List<GcSingal> gcsSignal = metadata.Features.GCSignals;
            List<Gene> genesList = metadata.Features.Genes;
            List<InterveningDna> interveningDNA = metadata.Features.InterveningDNAs;
            List<Intron> intronList = metadata.Features.Introns;
            List<LongTerminalRepeat> LTR = metadata.Features.LongTerminalRepeats;
            List<MaturePeptide> matPeptide = metadata.Features.MaturePeptides;
            List<MiscBinding> miscBinding = metadata.Features.MiscBindings;
            List<MiscDifference> miscDifference = metadata.Features.MiscDifferences;
            List<MiscFeature> miscFeatures = metadata.Features.MiscFeatures;
            List<MiscRecombination> miscRecobination =
                metadata.Features.MiscRecombinations;
            List<MiscRna> miscRNA = metadata.Features.MiscRNAs;
            List<MiscSignal> miscSignal = metadata.Features.MiscSignals;
            List<MiscStructure> miscStructure = metadata.Features.MiscStructures;
            List<ModifiedBase> modifierBase = metadata.Features.ModifiedBases;
            List<MessengerRna> mRNA = metadata.Features.MessengerRNAs;
            List<NonCodingRna> nonCodingRNA = metadata.Features.NonCodingRNAs;
            List<OperonRegion> operonRegion = metadata.Features.OperonRegions;
            List<PolyASignal> polySignal = metadata.Features.PolyASignals;
            List<PolyASite> polySites = metadata.Features.PolyASites;
            List<PrecursorRna> precursorRNA = metadata.Features.PrecursorRNAs;
            List<ProteinBindingSite> proteinBindingSites =
                metadata.Features.ProteinBindingSites;
            List<RibosomeBindingSite> rBindingSites =
                metadata.Features.RibosomeBindingSites;
            List<ReplicationOrigin> repliconOrigin =
                metadata.Features.ReplicationOrigins;
            List<RepeatRegion> repeatRegion = metadata.Features.RepeatRegions;
            List<RibosomalRna> rRNA = metadata.Features.RibosomalRNAs;
            List<SignalPeptide> signalPeptide = metadata.Features.SignalPeptides;
            List<StemLoop> stemLoop = metadata.Features.StemLoops;
            List<TataSignal> tataSignals = metadata.Features.TATASignals;
            List<Terminator> terminator = metadata.Features.Terminators;
            List<TransferMessengerRna> tmRNA =
                metadata.Features.TransferMessengerRNAs;
            List<TransitPeptide> transitPeptide = metadata.Features.TransitPeptides;
            List<TransferRna> tRNA = metadata.Features.TransferRNAs;
            List<UnsureSequenceRegion> unSecureRegion =
                metadata.Features.UnsureSequenceRegions;
            List<Variation> variations = metadata.Features.Variations;

            // Validate GenBank Features.
            Assert.AreEqual(minus10Signal.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(minus35Signal.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(threePrimeUTR.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(fivePrimeUTR.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(caatSignal.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(attenuator.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(displacementLoop.Count, Convert.ToInt32(zeroValue, null));

            Assert.AreEqual(enhancer.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(gcsSignal.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(genesList.Count.ToString((IFormatProvider) null), GenesCount);
            Assert.AreEqual(interveningDNA.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(LTR.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(matPeptide.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(miscBinding.Count, Convert.ToInt32(zeroValue, null));


            Assert.AreEqual(miscDifference.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(miscFeatures.Count.ToString((IFormatProvider) null), miscFeaturesCount);
            Assert.AreEqual(miscRecobination.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(miscSignal.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(modifierBase.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(miscRNA.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(miscStructure.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(mRNA.Count.ToString((IFormatProvider) null), mRNAFeatureCount);
            Assert.AreEqual(nonCodingRNA.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(operonRegion.Count, Convert.ToInt32(zeroValue, null));

            Assert.AreEqual(polySignal.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(polySites.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(precursorRNA.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(proteinBindingSites.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(rBindingSites.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(repliconOrigin.Count, Convert.ToInt32(zeroValue, null));

            Assert.AreEqual(rRNA.Count.ToString((IFormatProvider) null), rRNACount);
            Assert.AreEqual(signalPeptide.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(stemLoop.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(tataSignals.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(repeatRegion.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(terminator.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(tmRNA.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(variations.Count, Convert.ToInt32(zeroValue, null));

            Assert.AreEqual(tRNA.Count.ToString((IFormatProvider) null), tRNACount);
            Assert.AreEqual(transitPeptide.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(unSecureRegion.Count, Convert.ToInt32(zeroValue, null));
            Assert.AreEqual(stemLoop.Count, Convert.ToInt32(zeroValue, null));

            Assert.AreEqual(allFeatures.Count, Convert.ToInt32(allFeaturesCount, null));
            Assert.AreEqual(CDS.Count, Convert.ToInt32(cdsFeatureCount, null));
            Assert.AreEqual(exonList.Count, Convert.ToInt32(exonFeatureCount, null));
            Assert.AreEqual(intronList.Count, Convert.ToInt32(intronFeatureCount, null));
        }
Exemple #25
0
 public void TestGenBankWhenUserSetsIncorrectAlphabet()
 {
     // parse
     ISequenceParser parser = new GenBankParser();
     parser.Alphabet = Alphabets.Protein;
     bool failed = false;
     try
     {
         var seqList = parser.Parse(_singleDnaSeqGenBankFilename);
         var x = seqList.ElementAt(0);
         failed = true;
     }
     catch (InvalidDataException)
     {
         // all is well with the world
     }
     if (failed)
     {
         Assert.Fail("Failed to throw exception for trying to create sequence using incorrect alphabet.");
     }
 }
        public void ValidateSequenceFeatureUsingReferencedSequence()
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                Constants.GenBankFileSubSequenceNode, Constants.FilePathNode);
            string subSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.GenBankFileSubSequenceNode, Constants.ExpectedSubSequence);
            string subSequenceStart = utilityObj.xmlUtil.GetTextValue(
                Constants.GenBankFileSubSequenceNode, Constants.SequenceStart);
            string subSequenceEnd = utilityObj.xmlUtil.GetTextValue(
                Constants.GenBankFileSubSequenceNode, Constants.SequenceEnd);
            string referenceSeq = utilityObj.xmlUtil.GetTextValue(
                Constants.GenBankFileSubSequenceNode, Constants.referenceSeq);

            ISequence sequence;
            ISequence firstFeatureSeq = null;

            // Parse a genBank file.
            var refSequence = new Sequence(Alphabets.RNA, referenceSeq);
            var parserObj = new GenBankParser();
            sequence = parserObj.Parse(filePath).FirstOrDefault();

            var metadata =
                sequence.Metadata[Constants.GenBank] as GenBankMetadata;

            // Get Subsequence feature,start and end postions.
            var referenceSequences =
                new Dictionary<string, ISequence>();
            referenceSequences.Add(Constants.Reference, refSequence);
            firstFeatureSeq = metadata.Features.All[0].GetSubSequence(sequence,
                                                                      referenceSequences);

            var sequenceString = new string(firstFeatureSeq.Select(a => (char) a).ToArray());

            // Validate SubSequence.            
            Assert.AreEqual(sequenceString, subSequence);
            Assert.AreEqual(metadata.Features.All[0].Location.LocationStart.ToString((IFormatProvider) null),
                            subSequenceStart);
            Assert.AreEqual(metadata.Features.All[0].Location.LocationEnd.ToString((IFormatProvider) null),
                            subSequenceEnd);
            Assert.IsNull(metadata.Features.All[0].Location.Accession);
            Assert.AreEqual(metadata.Features.All[0].Location.StartData,
                            subSequenceStart);
            Assert.AreEqual(metadata.Features.All[0].Location.EndData,
                            subSequenceEnd);

            // Log to VSTest GUI
            ApplicationLog.WriteLine(string.Format(null,
                                                   "GenBank Features BVT: Successfully validated the Subsequence feature '{0}'",
                                                   sequenceString));
            ApplicationLog.WriteLine(string.Format(null,
                                                   "GenBank Features BVT: Successfully validated the start of subsequence'{0}'",
                                                   metadata.Features.All[0].Location.LocationStart.ToString(
                                                       (IFormatProvider) null)));
        }
Exemple #27
0
        public void GenBankProperties()
        {
            ISequenceParser parser = new GenBankParser();

            Assert.AreEqual(parser.Name, Resource.GENBANK_NAME);
            Assert.AreEqual(parser.Description, Resource.GENBANKPARSER_DESCRIPTION);
            Assert.AreEqual(parser.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION);

            ISequenceFormatter formatter = new GenBankFormatter();

            Assert.AreEqual(formatter.Name, Resource.GENBANK_NAME);
            Assert.AreEqual(formatter.Description, Resource.GENBANKFORMATTER_DESCRIPTION);
            Assert.AreEqual(formatter.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION);
        }
        /// <summary>
        ///     Validate GenBank features.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        /// <param name="methodName">Name of the method</param>
        private void ValidateGenBankFeatures(string nodeName,
                                             string methodName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string alphabetName = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.AlphabetNameNode);
            string expectedSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequenceNode);
            string mRNAFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.mRNACount);
            string exonFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExonCount);
            string intronFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.IntronCount);
            string cdsFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSCount);
            string allFeaturesCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.GenBankFeaturesCount);
            string expectedCDSKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSKey);
            string expectedIntronKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.IntronKey);
            string expectedExonKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExonKey);
            string mRNAKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.mRNAKey);
            string sourceKeyName = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SourceKey);
            string proteinKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ProteinKeyName);
            string tempFileName = Path.GetTempFileName();
            ISequenceParser parserObj = new GenBankParser();
            IEnumerable<ISequence> sequenceList = parserObj.Parse(filePath);

            if (sequenceList.Count() == 1)
            {
                string expectedUpdatedSequence =
                    expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq = new Sequence(Utility.GetAlphabet(alphabetName), expectedUpdatedSequence);
                orgSeq.ID = sequenceList.ElementAt(0).ID;

                orgSeq.Metadata.Add(Constants.GenBank,
                                    sequenceList.ElementAt(0).Metadata[Constants.GenBank]);

                ISequenceFormatter formatterObj = new GenBankFormatter();
                formatterObj.Format(orgSeq, tempFileName);
            }
            else
            {
                string expectedUpdatedSequence =
                    expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq =
                    new Sequence(Utility.GetAlphabet(alphabetName), expectedUpdatedSequence)
                    {
                        ID = sequenceList.ElementAt(1).ID
                    };

                orgSeq.Metadata.Add(Constants.GenBank,
                                    sequenceList.ElementAt(1).Metadata[Constants.GenBank]);
                ISequenceFormatter formatterObj = new GenBankFormatter();
                formatterObj.Format(orgSeq, tempFileName);
            }

            // parse a temporary file.
            var tempParserObj = new GenBankParser();
            {
                IEnumerable<ISequence> tempFileSeqList = tempParserObj.Parse(tempFileName);
                ISequence sequence = tempFileSeqList.ElementAt(0);

                var metadata = (GenBankMetadata) sequence.Metadata[Constants.GenBank];

                // Validate formatted temporary file GenBank Features.
                Assert.AreEqual(metadata.Features.All.Count,
                                Convert.ToInt32(allFeaturesCount, null));
                Assert.AreEqual(metadata.Features.CodingSequences.Count,
                                Convert.ToInt32(cdsFeatureCount, null));
                Assert.AreEqual(metadata.Features.Exons.Count,
                                Convert.ToInt32(exonFeatureCount, null));
                Assert.AreEqual(metadata.Features.Introns.Count,
                                Convert.ToInt32(intronFeatureCount, null));
                Assert.AreEqual(metadata.Features.MessengerRNAs.Count,
                                Convert.ToInt32(mRNAFeatureCount, null));
                Assert.AreEqual(metadata.Features.Attenuators.Count, 0);
                Assert.AreEqual(metadata.Features.CAATSignals.Count, 0);
                Assert.AreEqual(metadata.Features.DisplacementLoops.Count, 0);
                Assert.AreEqual(metadata.Features.Enhancers.Count, 0);
                Assert.AreEqual(metadata.Features.Genes.Count, 0);

                if ((0 == string.Compare(methodName, "DNA",
                                         CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))
                    || (0 == string.Compare(methodName, "RNA",
                                            CultureInfo.CurrentCulture, CompareOptions.IgnoreCase)))
                {
                    IList<FeatureItem> featureList = metadata.Features.All;
                    Assert.AreEqual(featureList[0].Key.ToString(null), sourceKeyName);
                    Assert.AreEqual(featureList[1].Key.ToString(null), mRNAKey);
                    Assert.AreEqual(featureList[3].Key.ToString(null), expectedCDSKey);
                    Assert.AreEqual(featureList[5].Key.ToString(null), expectedExonKey);
                    Assert.AreEqual(featureList[6].Key.ToString(null), expectedIntronKey);
                    ApplicationLog.WriteLine(
                        "GenBank Features BVT: Successfully validated the GenBank Features");
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "GenBank Features BVT: Successfully validated the CDS feature '{0}'",
                                                           featureList[3].Key.ToString(null)));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "GenBank Features BVT: Successfully validated the Exon feature '{0}'",
                                                           featureList[5].Key.ToString(null)));
                }
                else
                {
                    IList<FeatureItem> proFeatureList = metadata.Features.All;
                    Assert.AreEqual(proFeatureList[0].Key.ToString(null), sourceKeyName);
                    Assert.AreEqual(proFeatureList[1].Key.ToString(null), proteinKey);
                    Assert.AreEqual(proFeatureList[2].Key.ToString(null), expectedCDSKey);
                    ApplicationLog.WriteLine(
                        "GenBank Features BVT: Successfully validated the GenBank Features");
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "GenBank Features BVT: Successfully validated the CDS feature '{0}'",
                                                           proFeatureList[2].Key.ToString(null)));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "GenBank Features BVT: Successfully validated the Source feature '{0}'",
                                                           proFeatureList[0].Key.ToString(null)));
                }
            }
            File.Delete(tempFileName);
        }
Exemple #29
0
 public void TestParsingREFSEQPrimaryHeader()
 {
     // Test parsing Primary header which contains table with header.
     // REFSEQ_SPAN         PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP
     var results = new GenBankParser()
         .Parse(_genBankFile_WithREFSEQPrimaryData)
         .ToList();
 }
        /// <summary>
        ///     Validate addition of GenBank features.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateAdditionGenBankFeatures(string nodeName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string alphabetName = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.AlphabetNameNode);
            string expectedSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequenceNode);
            string addFirstKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstKey);
            string addSecondKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondKey);
            string addFirstLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstLocation);
            string addSecondLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondLocation);
            string addFirstQualifier = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstQualifier);
            string addSecondQualifier = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondQualifier);

            ISequenceParser parser1 = new GenBankParser();
            {
                IEnumerable<ISequence> seqList1 = parser1.Parse(filePath);
                var localBuilderObj = new LocationBuilder();

                string tempFileName = Path.GetTempFileName();
                string expectedUpdatedSequence =
                    expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq = new Sequence(Utility.GetAlphabet(alphabetName),
                                          expectedUpdatedSequence);
                orgSeq.ID = seqList1.ElementAt(0).ID;

                orgSeq.Metadata.Add(Constants.GenBank,
                                    seqList1.ElementAt(0).Metadata[Constants.GenBank]);

                ISequenceFormatter formatterObj = new GenBankFormatter();
                {
                    formatterObj.Format(orgSeq, tempFileName);

                    // parse GenBank file.
                    var parserObj = new GenBankParser();
                    {
                        IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName);

                        ISequence seq = seqList.ElementAt(0);
                        var metadata = (GenBankMetadata) seq.Metadata[Constants.GenBank];

                        // Add a new features to Genbank features list.
                        metadata.Features = new SequenceFeatures();
                        var feature = new FeatureItem(addFirstKey, addFirstLocation);
                        var qualifierValues = new List<string>();
                        qualifierValues.Add(addFirstQualifier);
                        qualifierValues.Add(addFirstQualifier);
                        feature.Qualifiers.Add(addFirstQualifier, qualifierValues);
                        metadata.Features.All.Add(feature);

                        feature = new FeatureItem(addSecondKey, addSecondLocation);
                        qualifierValues = new List<string>();
                        qualifierValues.Add(addSecondQualifier);
                        qualifierValues.Add(addSecondQualifier);
                        feature.Qualifiers.Add(addSecondQualifier, qualifierValues);
                        metadata.Features.All.Add(feature);

                        // Validate added GenBank features.
                        Assert.AreEqual(metadata.Features.All[0].Key.ToString(null), addFirstKey);
                        Assert.AreEqual(
                            localBuilderObj.GetLocationString(metadata.Features.All[0].Location),
                            addFirstLocation);
                        Assert.AreEqual(metadata.Features.All[1].Key.ToString(null), addSecondKey);
                        Assert.AreEqual(localBuilderObj.GetLocationString(metadata.Features.All[1].Location),
                                        addSecondLocation);

                        parserObj.Close();
                    }

                    File.Delete(tempFileName);
                }
            }
        }
        /// <summary>
        ///     Validate GenBank standard features key.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        /// <param name="methodName">Name of the method</param>
        private void ValidateStandardFeaturesKey(string nodeName, string methodName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string expectedCondingSeqCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSCount);
            string exonFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExonCount);
            string expectedtRNA = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.tRNACount);
            string expectedGeneCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.GeneCount);
            string miscFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.MiscFeatureCount);
            string expectedCDSKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSKey);
            string expectedIntronKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.IntronKey);
            string mRNAKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.mRNAKey);
            string allFeaturesCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.StandardFeaturesCount);

            // Parse a file.
            ISequenceParser parserObj = new GenBankParser();
            IEnumerable<ISequence> seq = parserObj.Parse(filePath);

            var metadata =
                seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata;

            if ((0 == string.Compare(methodName, "DNA",
                                     CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))
                || (0 == string.Compare(methodName, "RNA",
                                        CultureInfo.CurrentCulture, CompareOptions.IgnoreCase)))
            {
                // Validate standard features keys.
                Assert.AreEqual(metadata.Features.CodingSequences.Count.ToString((IFormatProvider) null),
                                expectedCondingSeqCount);
                Assert.AreEqual(metadata.Features.Exons.Count.ToString((IFormatProvider) null),
                                exonFeatureCount);
                Assert.AreEqual(metadata.Features.TransferRNAs.Count.ToString((IFormatProvider) null),
                                expectedtRNA);
                Assert.AreEqual(metadata.Features.Genes.Count.ToString((IFormatProvider) null),
                                expectedGeneCount);
                Assert.AreEqual(metadata.Features.MiscFeatures.Count.ToString((IFormatProvider) null),
                                miscFeatureCount);
                Assert.AreEqual(StandardFeatureKeys.CodingSequence.ToString(null),
                                expectedCDSKey);
                Assert.AreEqual(StandardFeatureKeys.Intron.ToString(null),
                                expectedIntronKey);
                Assert.AreEqual(StandardFeatureKeys.MessengerRna.ToString(null),
                                mRNAKey);
                Assert.AreEqual(StandardFeatureKeys.All.Count.ToString((IFormatProvider) null),
                                allFeaturesCount);
            }
            else
            {
                Assert.AreEqual(metadata.Features.CodingSequences.Count.ToString((IFormatProvider) null),
                                expectedCondingSeqCount);
                Assert.AreEqual(StandardFeatureKeys.CodingSequence.ToString(null),
                                expectedCDSKey);
            }
        }
Exemple #32
0
        /// <summary>
        /// Validates GenBank Parser for specific test cases
        /// which takes ISequenceParser as input.
        /// <param name="parser">ISequenceParser object.</param>
        /// </summary>
        //private static void ValidateParserSpecialTestCases(ISequenceParser parserObj)
        private static void ValidateParserSpecialTestCases()
        {
            ISequenceParser parserObj = new GenBankParser();
            {
                Assert.IsTrue(File.Exists(FilePath));
                // Logs information to the log file
                ApplicationLog.WriteLine(string.Format("GenBank Parser : File Exists in the Path '{0}'.",
                    FilePath));
                IEnumerable<ISequence> seqList = parserObj.Parse(FilePath);
                ISequence seq = seqList.ElementAt(0);
                Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                Assert.AreEqual(SeqId, seq.ID);

                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
                }
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                        metadata.Locus.StrandTopology.ToString().ToUpper(
                        CultureInfo.CurrentCulture));
                }
                if (metadata.Locus.DivisionCode != SequenceDivisionCode.None)
                {
                    Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                }
                Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);

                if (0 != string.Compare(AlphabetName, "rna",
                    CultureInfo.CurrentCulture,CompareOptions.IgnoreCase))
                {
                    Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");
                }
                else
                {
                    ApplicationLog.WriteLine(
                        "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date Properties");
                }

                // Replace all the empty spaces, paragraphs and new line for validation
                string updatedExpSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper(
                    CultureInfo.CurrentCulture);
                string updatedActualSequence =
                    new string(seq.Select(a => (char)a).ToArray()).Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper(
                    CultureInfo.CurrentCulture);

                Assert.AreEqual(updatedExpSequence, updatedActualSequence);
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the Sequence");
            }
        }