Example #1
0
 public void GenBankFormatterValidateReadAndWriteMultipleDBLinks()
 {
     // Create a Sequence with all attributes.
     // parse and update the properties instead of parsing entire file.   
     string tempFileName = Path.GetTempFileName();
     ISequenceParser parser1 = new GenBankParser();
     using (parser1.Open(_genBankFile_WithMultipleDBLines))
     {
         var orgSeq = parser1.Parse().First();
         ISequenceFormatter formatter = new GenBankFormatter();
         using (formatter.Open(tempFileName))
         {
             formatter.Format(orgSeq);
             formatter.Close();
         }
     }
     var same = CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines);
     File.Delete(tempFileName);
     Assert.IsTrue(same);
     ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop");
 }
Example #2
0
        public void TestGenBankWhenUserSetsProteinAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();
            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault();
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();

            using (formatter.Open(TempGenBankFileName))
                formatter.Format(seq);

            string actual = string.Empty;
            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }

            File.Delete(TempGenBankFileName);
            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", ""));
        }
Example #3
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);
            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);

                IEnumerable<ISequence> seqList = new GenBankParser().Parse(fileInfo.FullName);

                ISequenceFormatter formatter = new GenBankFormatter();
                using (formatter.Open(TempGenBankFileName))
                {
                    (formatter as GenBankFormatter).Format(seqList.ToList());
                }

                using (var reader = new StreamReader(TempGenBankFileName))
                {
                    string actual = reader.ReadToEnd();
                }
                
                File.Delete(TempGenBankFileName);
            }
        }
Example #4
0
        public void GenBankFormatterValidateWriteUsingStream()
        {
            InitializeXmlVariables();

            // Create a Sequence with all attributes.
            // Parse and update the properties instead of parsing entire file.            
            ISequenceParser parser1 = new GenBankParser();
            {
                IEnumerable<ISequence> seqList1 = parser1.Parse(FilePath);
                string tempFileName = Path.GetTempFileName();
                GenBankMetadata metadata = null;
                ISequence seq = null;
                string expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq =
                    new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);
                orgSeq.Metadata.Add("GenBank",
                                    seqList1.ElementAt(0).Metadata["GenBank"]);
                orgSeq.ID = seqList1.ElementAt(0).ID;

                ISequenceFormatter formatter = new GenBankFormatter();
                {
                    using (formatter.Open(tempFileName))
                    {
                        formatter.Format(orgSeq);
                    }
                }

                var parserObj = new GenBankParser();
                {
                    IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName);
                    seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting 
                    metadata = (GenBankMetadata) seq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
                    }
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string            
                Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char) a).ToArray()));
                ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
                File.Delete(tempFileName);
            }
        }