public void GenBankFormatterValidateReadAndWriteMultipleDBLinks() { // Create a Sequence with all attributes. // parse and update the properties instead of parsing entire file. string tempFileName = Path.GetTempFileName(); ISequenceParser parser1 = new GenBankParser(); using (parser1.Open(_genBankFile_WithMultipleDBLines)) { var orgSeq = parser1.Parse().First(); ISequenceFormatter formatter = new GenBankFormatter(); using (formatter.Open(tempFileName)) { formatter.Format(orgSeq); formatter.Close(); } } var same = CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines); File.Delete(tempFileName); Assert.IsTrue(same); ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop"); }
public void TestGenBankWhenUserSetsProteinAlphabet() { // set correct alphabet and parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.DNA; ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault(); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); // format ISequenceFormatter formatter = new GenBankFormatter(); using (formatter.Open(TempGenBankFileName)) formatter.Format(seq); string actual = string.Empty; using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); // test the formatting Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", "")); }
public void TestGenBankForManyFiles() { // parser and formatter will be used for all files in input dir // iterate through the files in input dir, parsing and formatting each; write results // to log file DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath); foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk")) { ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine); IEnumerable<ISequence> seqList = new GenBankParser().Parse(fileInfo.FullName); ISequenceFormatter formatter = new GenBankFormatter(); using (formatter.Open(TempGenBankFileName)) { (formatter as GenBankFormatter).Format(seqList.ToList()); } using (var reader = new StreamReader(TempGenBankFileName)) { string actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); } }
public void GenBankFormatterValidateWriteUsingStream() { InitializeXmlVariables(); // Create a Sequence with all attributes. // Parse and update the properties instead of parsing entire file. ISequenceParser parser1 = new GenBankParser(); { IEnumerable<ISequence> seqList1 = parser1.Parse(FilePath); string tempFileName = Path.GetTempFileName(); GenBankMetadata metadata = null; ISequence seq = null; string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); var orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence); orgSeq.Metadata.Add("GenBank", seqList1.ElementAt(0).Metadata["GenBank"]); orgSeq.ID = seqList1.ElementAt(0).ID; ISequenceFormatter formatter = new GenBankFormatter(); { using (formatter.Open(tempFileName)) { formatter.Format(orgSeq); } } var parserObj = new GenBankParser(); { IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName); seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting metadata = (GenBankMetadata) seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString(null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char) a).ToArray())); ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence"); File.Delete(tempFileName); } }