Esempio n. 1
0
        public void GenBankProperties()
        {
            ISequenceParser parser = new GenBankParser();

            Assert.AreEqual(parser.Name, Resource.GENBANK_NAME);
            Assert.AreEqual(parser.Description, Resource.GENBANKPARSER_DESCRIPTION);
            Assert.AreEqual(parser.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION);

            ISequenceFormatter formatter = new GenBankFormatter();

            Assert.AreEqual(formatter.Name, Resource.GENBANK_NAME);
            Assert.AreEqual(formatter.Description, Resource.GENBANKFORMATTER_DESCRIPTION);
            Assert.AreEqual(formatter.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION);
        }
Esempio n. 2
0
 public void GenBankFormatterValidateReadAndWriteMultipleDBLinks()
 {
     // Create a Sequence with all attributes.
     // parse and update the properties instead of parsing entire file.   
     string tempFileName = Path.GetTempFileName();
     ISequenceParser parser1 = new GenBankParser();
     using (parser1.Open(_genBankFile_WithMultipleDBLines))
     {
         var orgSeq = parser1.Parse().First();
         ISequenceFormatter formatter = new GenBankFormatter();
         using (formatter.Open(tempFileName))
         {
             formatter.Format(orgSeq);
             formatter.Close();
         }
     }
     var same = CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines);
     File.Delete(tempFileName);
     Assert.IsTrue(same);
     ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop");
 }
Esempio n. 3
0
        public void TestGenBankWhenUserSetsDnaAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();
            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse(_singleDnaSeqGenBankFilename).FirstOrDefault();
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            formatter.Format(seq, TempGenBankFileName);

            string actual = string.Empty;
            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", ""));
        }
Esempio n. 4
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);
            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);

                IEnumerable<ISequence> seqList = new GenBankParser().Parse(fileInfo.FullName);

                ISequenceFormatter formatter = new GenBankFormatter();
                using (formatter.Open(TempGenBankFileName))
                {
                    (formatter as GenBankFormatter).Format(seqList.ToList());
                }

                using (var reader = new StreamReader(TempGenBankFileName))
                {
                    string actual = reader.ReadToEnd();
                }
                
                File.Delete(TempGenBankFileName);
            }
        }
        /// <summary>
        ///     Validate addition of GenBank features.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        private void ValidateAdditionGenBankFeatures(string nodeName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string alphabetName = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.AlphabetNameNode);
            string expectedSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequenceNode);
            string addFirstKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstKey);
            string addSecondKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondKey);
            string addFirstLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstLocation);
            string addSecondLocation = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondLocation);
            string addFirstQualifier = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FirstQualifier);
            string addSecondQualifier = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SecondQualifier);

            ISequenceParser parser1 = new GenBankParser();
            {
                IEnumerable<ISequence> seqList1 = parser1.Parse(filePath);
                var localBuilderObj = new LocationBuilder();

                string tempFileName = Path.GetTempFileName();
                string expectedUpdatedSequence =
                    expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq = new Sequence(Utility.GetAlphabet(alphabetName),
                                          expectedUpdatedSequence);
                orgSeq.ID = seqList1.ElementAt(0).ID;

                orgSeq.Metadata.Add(Constants.GenBank,
                                    seqList1.ElementAt(0).Metadata[Constants.GenBank]);

                ISequenceFormatter formatterObj = new GenBankFormatter();
                {
                    formatterObj.Format(orgSeq, tempFileName);

                    // parse GenBank file.
                    var parserObj = new GenBankParser();
                    {
                        IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName);

                        ISequence seq = seqList.ElementAt(0);
                        var metadata = (GenBankMetadata) seq.Metadata[Constants.GenBank];

                        // Add a new features to Genbank features list.
                        metadata.Features = new SequenceFeatures();
                        var feature = new FeatureItem(addFirstKey, addFirstLocation);
                        var qualifierValues = new List<string>();
                        qualifierValues.Add(addFirstQualifier);
                        qualifierValues.Add(addFirstQualifier);
                        feature.Qualifiers.Add(addFirstQualifier, qualifierValues);
                        metadata.Features.All.Add(feature);

                        feature = new FeatureItem(addSecondKey, addSecondLocation);
                        qualifierValues = new List<string>();
                        qualifierValues.Add(addSecondQualifier);
                        qualifierValues.Add(addSecondQualifier);
                        feature.Qualifiers.Add(addSecondQualifier, qualifierValues);
                        metadata.Features.All.Add(feature);

                        // Validate added GenBank features.
                        Assert.AreEqual(metadata.Features.All[0].Key.ToString(null), addFirstKey);
                        Assert.AreEqual(
                            localBuilderObj.GetLocationString(metadata.Features.All[0].Location),
                            addFirstLocation);
                        Assert.AreEqual(metadata.Features.All[1].Key.ToString(null), addSecondKey);
                        Assert.AreEqual(localBuilderObj.GetLocationString(metadata.Features.All[1].Location),
                                        addSecondLocation);

                        parserObj.Close();
                    }

                    File.Delete(tempFileName);
                }
            }
        }
Esempio n. 6
0
        public void TestGenBankWhenParsingOne()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault();

            // test the non-metadata properties
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);
            Assert.AreEqual("SCU49845", seq.ID);

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None);
            Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower(CultureInfo.CurrentCulture));
            Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse("21-JUN-1999", (IFormatProvider)null), metadata.Locus.Date);
            Assert.AreEqual("1", metadata.Version.Version);
            Assert.AreEqual("1293613", metadata.Version.GiNumber);

            // test that we're correctly putting all types of metadata in the right places
            Assert.AreEqual(1, seq.Metadata.Count);
            IList<CitationReference> referenceList = metadata.References;
            Assert.AreEqual(3, referenceList.Count);
            IList<FeatureItem> featureList = metadata.Features.All;
            Assert.AreEqual(6, featureList.Count);
            Assert.AreEqual(4, featureList[0].Qualifiers.Count);
            Assert.AreEqual(5, featureList[1].Qualifiers.Count);
            Assert.AreEqual(1, featureList[2].Qualifiers.Count);

            // test the sequence string
            string expected = @"gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc";
            Assert.AreEqual(expected, new string(seq.Select(a => (char)a).ToArray()));

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            formatter.Format(seq, TempGenBankFileName);

            string actual = string.Empty;
            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", ""));
        }
        /// <summary>
        ///     Validate GenBank features.
        /// </summary>
        /// <param name="nodeName">xml node name.</param>
        /// <param name="methodName">Name of the method</param>
        private void ValidateGenBankFeatures(string nodeName,
                                             string methodName)
        {
            // Get Values from XML node.
            string filePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string alphabetName = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.AlphabetNameNode);
            string expectedSequence = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExpectedSequenceNode);
            string mRNAFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.mRNACount);
            string exonFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExonCount);
            string intronFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.IntronCount);
            string cdsFeatureCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSCount);
            string allFeaturesCount = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.GenBankFeaturesCount);
            string expectedCDSKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.CDSKey);
            string expectedIntronKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.IntronKey);
            string expectedExonKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ExonKey);
            string mRNAKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.mRNAKey);
            string sourceKeyName = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.SourceKey);
            string proteinKey = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.ProteinKeyName);
            string tempFileName = Path.GetTempFileName();
            ISequenceParser parserObj = new GenBankParser();
            IEnumerable<ISequence> sequenceList = parserObj.Parse(filePath);

            if (sequenceList.Count() == 1)
            {
                string expectedUpdatedSequence =
                    expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq = new Sequence(Utility.GetAlphabet(alphabetName), expectedUpdatedSequence);
                orgSeq.ID = sequenceList.ElementAt(0).ID;

                orgSeq.Metadata.Add(Constants.GenBank,
                                    sequenceList.ElementAt(0).Metadata[Constants.GenBank]);

                ISequenceFormatter formatterObj = new GenBankFormatter();
                formatterObj.Format(orgSeq, tempFileName);
            }
            else
            {
                string expectedUpdatedSequence =
                    expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq =
                    new Sequence(Utility.GetAlphabet(alphabetName), expectedUpdatedSequence)
                    {
                        ID = sequenceList.ElementAt(1).ID
                    };

                orgSeq.Metadata.Add(Constants.GenBank,
                                    sequenceList.ElementAt(1).Metadata[Constants.GenBank]);
                ISequenceFormatter formatterObj = new GenBankFormatter();
                formatterObj.Format(orgSeq, tempFileName);
            }

            // parse a temporary file.
            var tempParserObj = new GenBankParser();
            {
                IEnumerable<ISequence> tempFileSeqList = tempParserObj.Parse(tempFileName);
                ISequence sequence = tempFileSeqList.ElementAt(0);

                var metadata = (GenBankMetadata) sequence.Metadata[Constants.GenBank];

                // Validate formatted temporary file GenBank Features.
                Assert.AreEqual(metadata.Features.All.Count,
                                Convert.ToInt32(allFeaturesCount, null));
                Assert.AreEqual(metadata.Features.CodingSequences.Count,
                                Convert.ToInt32(cdsFeatureCount, null));
                Assert.AreEqual(metadata.Features.Exons.Count,
                                Convert.ToInt32(exonFeatureCount, null));
                Assert.AreEqual(metadata.Features.Introns.Count,
                                Convert.ToInt32(intronFeatureCount, null));
                Assert.AreEqual(metadata.Features.MessengerRNAs.Count,
                                Convert.ToInt32(mRNAFeatureCount, null));
                Assert.AreEqual(metadata.Features.Attenuators.Count, 0);
                Assert.AreEqual(metadata.Features.CAATSignals.Count, 0);
                Assert.AreEqual(metadata.Features.DisplacementLoops.Count, 0);
                Assert.AreEqual(metadata.Features.Enhancers.Count, 0);
                Assert.AreEqual(metadata.Features.Genes.Count, 0);

                if ((0 == string.Compare(methodName, "DNA",
                                         CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))
                    || (0 == string.Compare(methodName, "RNA",
                                            CultureInfo.CurrentCulture, CompareOptions.IgnoreCase)))
                {
                    IList<FeatureItem> featureList = metadata.Features.All;
                    Assert.AreEqual(featureList[0].Key.ToString(null), sourceKeyName);
                    Assert.AreEqual(featureList[1].Key.ToString(null), mRNAKey);
                    Assert.AreEqual(featureList[3].Key.ToString(null), expectedCDSKey);
                    Assert.AreEqual(featureList[5].Key.ToString(null), expectedExonKey);
                    Assert.AreEqual(featureList[6].Key.ToString(null), expectedIntronKey);
                    ApplicationLog.WriteLine(
                        "GenBank Features BVT: Successfully validated the GenBank Features");
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "GenBank Features BVT: Successfully validated the CDS feature '{0}'",
                                                           featureList[3].Key.ToString(null)));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "GenBank Features BVT: Successfully validated the Exon feature '{0}'",
                                                           featureList[5].Key.ToString(null)));
                }
                else
                {
                    IList<FeatureItem> proFeatureList = metadata.Features.All;
                    Assert.AreEqual(proFeatureList[0].Key.ToString(null), sourceKeyName);
                    Assert.AreEqual(proFeatureList[1].Key.ToString(null), proteinKey);
                    Assert.AreEqual(proFeatureList[2].Key.ToString(null), expectedCDSKey);
                    ApplicationLog.WriteLine(
                        "GenBank Features BVT: Successfully validated the GenBank Features");
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "GenBank Features BVT: Successfully validated the CDS feature '{0}'",
                                                           proFeatureList[2].Key.ToString(null)));
                    ApplicationLog.WriteLine(string.Format(null,
                                                           "GenBank Features BVT: Successfully validated the Source feature '{0}'",
                                                           proFeatureList[0].Key.ToString(null)));
                }
            }
            File.Delete(tempFileName);
        }
Esempio n. 8
0
        public void GenBankFormatterValidateWriteUsingStream()
        {
            InitializeXmlVariables();

            // Create a Sequence with all attributes.
            // Parse and update the properties instead of parsing entire file.            
            ISequenceParser parser1 = new GenBankParser();
            {
                IEnumerable<ISequence> seqList1 = parser1.Parse(FilePath);
                string tempFileName = Path.GetTempFileName();
                GenBankMetadata metadata = null;
                ISequence seq = null;
                string expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq =
                    new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);
                orgSeq.Metadata.Add("GenBank",
                                    seqList1.ElementAt(0).Metadata["GenBank"]);
                orgSeq.ID = seqList1.ElementAt(0).ID;

                ISequenceFormatter formatter = new GenBankFormatter();
                {
                    using (formatter.Open(tempFileName))
                    {
                        formatter.Format(orgSeq);
                    }
                }

                var parserObj = new GenBankParser();
                {
                    IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName);
                    seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting 
                    metadata = (GenBankMetadata) seq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
                    }
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string            
                Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char) a).ToArray()));
                ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
                File.Delete(tempFileName);
            }
        }
Esempio n. 9
0
        public void GenBankFormatterWithParseValidateWriteFilePath()
        {
            InitializeXmlVariables();
            // parse
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable<ISequence> seqList = parserObj.Parse(FilePath);
                ISequence seq = seqList.ElementAt(0);
                string tempFileName = Path.GetTempFileName();
                ISequenceFormatter formatter = new GenBankFormatter();
                {
                    formatter.Format(seq, tempFileName);

                    // parse
                    ISequenceParser parserObjFromFile = new GenBankParser();
                    seqList = parserObjFromFile.Parse(tempFileName);
                    seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    var metadata =
                        (GenBankMetadata) seq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType,
                                        metadata.Locus.Strand.ToString());
                    }
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                    metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                    Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                    Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                    metadata.Locus.Date);
                    Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                    // test the sequence string
                    Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char) a).ToArray()));

                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Sequence");
                    File.Delete(tempFileName);
                }
            }
        }
Esempio n. 10
0
        /// <summary>
        /// Validates GenBank Formatter for General test cases.
        /// </summary>
        /// <param name="seqList">sequence list.</param>
        private static void ValidateWriteGeneralTestCases(IEnumerable<ISequence> seqList1)
        {
            // Create a Sequence with all attributes.
            // Parse and update the properties instead of parsing entire file.
            string expectedUpdatedSequence =
                ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
            Sequence orgSeq =
                new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);
                orgSeq.Metadata.Add("GenBank",
                    (GenBankMetadata)seqList1.ElementAt(0).Metadata["GenBank"]);
                orgSeq.ID = seqList1.ElementAt(0).ID;
                string tempFileName = System.IO.Path.GetTempFileName();
                ISequenceFormatter formatter = new GenBankFormatter();
                formatter.Format(orgSeq, tempFileName);

                // parse
                GenBankParser parserObj = new GenBankParser();
                IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName);
                ISequence seq = seqList.ElementAt(0);

                Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                Assert.AreEqual(SeqId, seq.ID);
                ApplicationLog.WriteLine(
                    "GenBank Formatter P1: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                GenBankMetadata metadata =
                    (GenBankMetadata)seq.Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                        metadata.Locus.Strand.ToString());
                }
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                        metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                }
                if (metadata.Locus.DivisionCode != SequenceDivisionCode.None)
                {
                    Assert.AreEqual(Div,
                        metadata.Locus.DivisionCode.ToString());
                }
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                    metadata.Locus.Date);

                if (0 != string.Compare(AlphabetName, "rna",
                     CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))
                {
                    Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");
                }
                else
                {
                    ApplicationLog.WriteLine(
                        "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date Properties");
                }

                string truncatedExpectedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper(
                    CultureInfo.CurrentCulture);
                string truncatedActualSequence =
                    new string(seq.Select(a => (char)a).ToArray()).Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper(
                    CultureInfo.CurrentCulture);

                // test the sequence string
                Assert.AreEqual(truncatedExpectedSequence, truncatedActualSequence);
                ApplicationLog.WriteLine(
                    "GenBank Formatter P1: Successfully validated the Sequence");
                File.Delete(tempFileName);
        }
Esempio n. 11
0
        /// <summary>
        /// Validates GenBank Formatter for the files which are 
        /// Parsed using GenBankParser for General test cases.
        /// </summary>
        private static void ValidateParseWriterGeneralTestCases()
        {
            Assert.IsTrue(File.Exists(FilePath));
            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format("GenBank Parser : File Exists in the Path '{0}'.", FilePath));
            string tempFileName = Path.GetTempFileName();

            // parse
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable<ISequence> seqList = parserObj.Parse(FilePath);
                ISequence seq = seqList.ElementAt(0);
                ISequenceFormatter formatter = new GenBankFormatter();
                {
                    formatter.Format(seq, tempFileName);
                    FilePath = tempFileName;
                    ValidateParserGeneralTestCases();
                }
            }
        }