public void TestGenBankFailureWhenParsingEmpty() { ISequenceParser parser = new GenBankParser(); bool failed = false; // test ParseOne try { ISequence seq = parser.ParseOne(new StringReader(string.Empty)); failed = true; } catch (Exception) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string."); } // test Parse try { IList <ISequence> seqList = parser.Parse(new StringReader(string.Empty)); failed = true; } catch (Exception) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for calling Parse on reader containing empty string."); } }
public void TestGenBankWhenUserSetsDnaAlphabet() { // set correct alphabet and parse ISequenceParser parser = new GenBankParser(_singleDnaSeqGenBankFilename); parser.Alphabet = Alphabets.DNA; ISequence seq = parser.Parse().FirstOrDefault(); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); // format ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName); string actual = string.Empty; (formatter as GenBankFormatter).Write(seq); formatter.Close(); using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); // test the formatting Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", "")); }
public void GenBankFormatterValidateWriteUsingStream() { InitializeXmlVariables(); // Create a Sequence with all attributes. // Parse and update the properties instead of parsing entire file. using (ISequenceParser parser1 = new GenBankParser(FilePath)) { IEnumerable <ISequence> seqList1 = parser1.Parse(); string tempFileName = System.IO.Path.GetTempFileName(); GenBankMetadata metadata = null; ISequence seq = null; string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); Sequence orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence); orgSeq.Metadata.Add("GenBank", (GenBankMetadata)seqList1.ElementAt(0).Metadata["GenBank"]); orgSeq.ID = seqList1.ElementAt(0).ID; using (ISequenceFormatter formatter = new GenBankFormatter()) { using (StreamWriter writer = new StreamWriter(tempFileName)) { formatter.Open(writer); formatter.Write(orgSeq); } } using (GenBankParser parserObj = new GenBankParser(tempFileName)) { IEnumerable <ISequence> seqList = parserObj.Parse(); seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray())); ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); File.Delete(tempFileName); } }
public void TestGenBankWhenUserSetsProteinAlphabet() { // set correct alphabet and parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.DNA; ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault(); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); // format ISequenceFormatter formatter = new GenBankFormatter(); using (formatter.Open(TempGenBankFileName)) formatter.Format(seq); string actual = string.Empty; using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); // test the formatting Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", "")); }
public void TestGenBankForManyFiles() { // parser and formatter will be used for all files in input dir ISequenceParser parser = new GenBankParser(); ISequenceFormatter formatter = new GenBankFormatter(); // iterate through the files in input dir, parsing and formatting each; write results // to log file DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath); foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk")) { ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine); try { foreach (Sequence sequence in parser.Parse(fileInfo.FullName)) { // don't do anything with it; just make sure it doesn't crash formatter.FormatString(sequence); } ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine); } catch (Exception e) { ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine); throw; } } }
public void TestGenBankWhenUserSetsDnaAlphabet() { // set correct alphabet and parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.DNA; ISequence seq = parser.Parse(_singleDnaSeqGenBankFilename).FirstOrDefault(); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); // format ISequenceFormatter formatter = new GenBankFormatter(); formatter.Format(seq, TempGenBankFileName); string actual = string.Empty; using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); // test the formatting Assert.AreEqual(Utility.CleanupWhiteSpace(_singleDnaSeqGenBankFileExpectedOutput), Utility.CleanupWhiteSpace(actual)); }
protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext) { String inputFileName = InputFile; GenBankParser parser = new GenBankParser(); SequenceList = parser.Parse(inputFileName); return(ActivityExecutionStatus.Closed); }
public void TestGenBankLocusTokenParser() { // parse GenBankParser parser = new GenBankParser(); ISequence seq = parser.Parse(_genBankFile_LocusTokenParserTest).FirstOrDefault(); Assert.IsNotNull(seq); }
public void TestGenBankParseVersionEmpty() { // parse GenBankParser parser = new GenBankParser(); ISequence seq = parser.Parse(_genBankFile_ParseVersionEmpty).FirstOrDefault(); Assert.IsNotNull(seq); }
public void TestParsingREFSEQPrimaryHeader() { // Test parsing Primary header which contains table with header. // REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP GenBankParser parser = new GenBankParser(_genBankFile_WithREFSEQPrimaryData); parser.Parse(); }
public void TestGenBankParseOriginShifted2() { // parse GenBankParser parser = new GenBankParser(); ISequence seq = parser.Parse(_genBankFile_ParseOriginShifted2).FirstOrDefault(); Assert.IsNotNull(seq); }
public void TestGenBankEmptyOrganismClassification() { // parse GenBankParser parser = new GenBankParser(); ISequence seq = parser.Parse(_genBankFile_EmptyOrganismClassificationTest).FirstOrDefault(); Assert.IsNotNull(seq); }
public void GenBankFormatterWithParseValidateWriteFilePath() { InitializeXmlVariables(); // parse using (ISequenceParser parserObj = new GenBankParser(FilePath)) { IEnumerable <ISequence> seqList = parserObj.Parse(); ISequence seq = seqList.ElementAt(0); string tempFileName = System.IO.Path.GetTempFileName(); using (ISequenceFormatter formatter = new GenBankFormatter(tempFileName)) { formatter.Write(seq); formatter.Close(); // parse ISequenceParser parserObjFromFile = new GenBankParser(tempFileName); seqList = parserObjFromFile.Parse(); seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray())); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); parserObjFromFile.Close(); parserObjFromFile.Dispose(); File.Delete(tempFileName); } } }
public void TestGenBankWhenParsingOne() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault(); // test the non-metadata properties Assert.AreEqual(Alphabets.DNA, seq.Alphabet); Assert.AreEqual("SCU49845", seq.ID); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None); Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower(CultureInfo.CurrentCulture)); Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse("21-JUN-1999", (IFormatProvider)null), metadata.Locus.Date); Assert.AreEqual("1", metadata.Version.Version); Assert.AreEqual("1293613", metadata.Version.GiNumber); // test that we're correctly putting all types of metadata in the right places Assert.AreEqual(1, seq.Metadata.Count); IList <CitationReference> referenceList = metadata.References; Assert.AreEqual(3, referenceList.Count); IList <FeatureItem> featureList = metadata.Features.All; Assert.AreEqual(6, featureList.Count); Assert.AreEqual(4, featureList[0].Qualifiers.Count); Assert.AreEqual(5, featureList[1].Qualifiers.Count); Assert.AreEqual(1, featureList[2].Qualifiers.Count); // test the sequence string string expected = @"gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc"; Assert.AreEqual(expected, new string(seq.Select(a => (char)a).ToArray())); // format ISequenceFormatter formatter = new GenBankFormatter(); formatter.Format(seq, TempGenBankFileName); string actual = string.Empty; using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); // test the formatting Assert.AreEqual(Utility.CleanupWhiteSpace(_singleProteinSeqGenBankFileExpectedOutput), Utility.CleanupWhiteSpace(actual)); }
public void TestGenBankWhenParsingMultiple() { // parse ISequenceParser parser = new GenBankParser(); IEnumerable <ISequence> seqList = parser.Parse(_multipleSeqGenBankFilename); // Just check the number of items returned and that they're not empty. The guts // are tested in TestGenBankWhenParsingOne. Assert.AreEqual(2, seqList.Count()); Assert.AreEqual(105, seqList.ElementAt(0).Count); Assert.AreEqual(5028, seqList.ElementAt(1).Count); }
public void GenBankParserValidateParseFileName() { InitializeXmlVariables(); // parse ISequenceParser parserObj = new GenBankParser(); IList <ISequence> seqList = parserObj.Parse(FilePath); ISequence seq = seqList[0]; // test the non-metadata properties if (0 == string.Compare(IsSequenceReadOnly, "true", false, CultureInfo.CurrentCulture)) { Assert.IsTrue(seq.IsReadOnly); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the ReadOnly Property"); } Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType); Assert.AreEqual(SeqId, seq.DisplayID); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GINumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ToString()); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); }
public void GenBankFormatterValidateWriteWithFilePath() { InitializeXmlVariables(); ISequenceParser parserObj = new GenBankParser(); { IEnumerable <ISequence> seqList1 = parserObj.Parse(FilePath); string tempFileName = Path.GetTempFileName(); string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); var orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence) { ID = seqList1.ElementAt(0).ID }; orgSeq.Metadata.Add("GenBank", seqList1.ElementAt(0).Metadata["GenBank"]); ISequenceFormatter formatter = new GenBankFormatter(); { formatter.Format(orgSeq, tempFileName); // parse ISequenceParser parserObjFromFile = new GenBankParser(); IEnumerable <ISequence> seqList = parserObjFromFile.Parse(tempFileName); ISequence seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting var metadata = (GenBankMetadata)orgSeq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString(null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray())); ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence"); File.Delete(tempFileName); } } }
public void GenBankParserValidateParseFileNameWithStream() { InitializeXmlVariables(); List <ISequence> seq = null; IEnumerable <ISequence> seqList = null; // Parse the Stream. using (ISequenceParser parserObj = new GenBankParser()) { using (StreamReader reader = new StreamReader(FilePath)) { seqList = parserObj.Parse(reader); seq = seqList.ToList(); } Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq[0].Alphabet); Assert.AreEqual(SeqId, seq[0].ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq[0].Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq[0].Select(a => (char)a).ToArray())); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); } }
public void ValidateBasicDerivedSequenceWithGenBankFormat() { // Gets the expected sequence from the Xml string expectedSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.ExpectedSequenceNode); string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(geneBankFilePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Concat( "Sequence BVT: Sequence BVT: The File exist in the Path ", geneBankFilePath)); // Parse a GenBank file Using Parse method and convert the same to sequence. ISequenceParser parser = new GenBankParser(); IList <ISequence> sequence = parser.Parse(geneBankFilePath); Assert.IsNotNull(sequence); Sequence geneBankSeq = (Sequence)sequence[0]; Assert.IsNotNull(geneBankSeq); Assert.AreEqual(expectedSequence, geneBankSeq.ToString()); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The GenBank Sequence is as expected.")); byte[] tmpEncodedSeq = new byte[geneBankSeq.Count]; (geneBankSeq as IList <byte>).CopyTo(tmpEncodedSeq, 0); Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The GenBank Sequence Length is as expected.")); // Create a derived Sequences for the fastA file sequence. BasicDerivedSequence genebankDerivedSeq = new BasicDerivedSequence(geneBankSeq, false, false, -1, -1); // validate the DerivedSequence with originalSequence. Assert.IsNotNull(genebankDerivedSeq); Assert.AreEqual(expectedSequence, genebankDerivedSeq.ToString()); Assert.AreEqual(geneBankSeq.ToString(), genebankDerivedSeq.ToString()); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The BasicDerived Sequence is as expected.")); // Logs to Nunit GUI. Console.WriteLine( "Sequence BVT: Validation of GenBank file Sequence is completed successfully."); }
public void ValidateSequenceInsertWithGenBankFormat() { // Gets the expected sequence from the Xml string expectedSequenceCount = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.SimpleFastaSequenceCount); string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.FilePathNode); string expectedSeqAfterAdd = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.ExpectedSeqAfterAdd); string seqAfterAdd = string.Empty; string alphabetName = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.AlphabetNameNode); string actualSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.ExpectedSingleChar); string seqBeforeAdding = string.Empty; // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence {0} is expected.", actualSequence, alphabetName)); Sequence seqItem = new Sequence(Utility.GetAlphabet(alphabetName), "TCGN"); Assert.IsTrue(File.Exists(geneBankFilePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: The File exist in the Path {0}.", geneBankFilePath)); ISequenceParser parser = new GenBankParser(); // Parse a FastA file Using Parse method and convert the same to sequence. IList <ISequence> sequence = parser.Parse(geneBankFilePath); Sequence Seq = (Sequence)sequence[0]; Seq.IsReadOnly = false; seqBeforeAdding = Seq.ToString(); Seq.Add(seqItem[0]); // Validate sequence list after adding sequence item to the sequence list. seqAfterAdd = Seq.ToString(); Assert.AreEqual(seqAfterAdd, expectedSeqAfterAdd); Assert.AreNotEqual(seqAfterAdd, seqBeforeAdding); Assert.AreEqual(Seq.Count.ToString((IFormatProvider)null), expectedSequenceCount); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence {0} is expected.", seqAfterAdd)); }
public void GenBankFeatures() { // parse ISequenceParser parser = new GenBankParser(_singleProteinSeqGenBankFilename); ISequence seq = parser.Parse().FirstOrDefault(); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; List <CodingSequence> CDS = metadata.Features.CodingSequences; Assert.AreEqual(CDS.Count, 3); Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1); Assert.AreEqual(CDS[0].GeneSymbol, string.Empty); Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1); Assert.IsFalse(CDS[0].Pseudo); Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2); Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2); Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2); Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2); Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0); parser.Close(); parser.Dispose(); ISequenceParser parser1 = new GenBankParser(_genBankDataPath + @"\NC_001284.gbk"); ISequence seq1 = parser1.Parse().FirstOrDefault(); metadata = seq1.Metadata["GenBank"] as GenBankMetadata; Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117); ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation(); byte[] tempData = new byte[seqTemp.Count]; for (int i = 0; i < seqTemp.Count; i++) { tempData[i] = seqTemp[i]; } string sequenceInString = ASCIIEncoding.ASCII.GetString(tempData); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"')); Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count); }
void InvalidateGenBankParser(string node) { // Initialization of xml strings. FilePath = utilityObj.xmlUtil.GetTextValue(node, Constants.FilePathNode); try { GenBankParser parserObj = new GenBankParser(FilePath); if (string.Equals(Constants.SimpleGenBankNodeName, node)) { parserObj.LocationBuilder = null; } else if (string.Equals(Constants.SimpleGenBankPrimaryNode, node)) { parserObj.Alphabet = Alphabets.RNA; } //parserObj.ParseOne(FilePath); parserObj.Parse(); Assert.Fail(); } catch (InvalidOperationException) { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the exception:"); Console.WriteLine( "GenBank Parser : Successfully validated the exception:"); } catch (InvalidDataException) { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the exception:"); Console.WriteLine( "GenBank Parser : Successfully validated the exception:"); } catch (Exception) { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the exception:"); Console.WriteLine( "GenBank Parser : Successfully validated the exception:"); } }
public void TestGenBankFailureWhenParsingEmpty() { bool failed = false; try { ISequenceParser parser = new GenBankParser(); parser.Parse(); failed = true; } catch (Exception) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string."); } }
public void TestGenBankForManyFiles() { // parser and formatter will be used for all files in input dir // iterate through the files in input dir, parsing and formatting each; write results // to log file DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath); foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk")) { ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine); ISequenceParser parser = new GenBankParser(fileInfo.FullName); try { IEnumerable <ISequence> seqList = parser.Parse(); // don't do anything with it; just make sure it doesn't crash ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName); string actual = string.Empty; (formatter as GenBankFormatter).Write(seqList.ToList()); using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); parser.Close(); parser.Dispose(); ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine); } catch (Exception e) { ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine); throw; } } }
public void GenBankParserValidateParseFileName() { InitializeXmlVariables(); // parse ISequenceParser parserObj = new GenBankParser(); { IEnumerable <ISequence> seqList = parserObj.Parse(FilePath); ISequence seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting var metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString(null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ConvertToString()); ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the Sequence"); } }
public void GenBankFormatterValidateReadAndWriteMultipleDBLinks() { // Create a Sequence with all attributes. // parse and update the properties instead of parsing entire file. string tempFileName = Path.GetTempFileName(); ISequenceParser parser1 = new GenBankParser(); using (parser1.Open(_genBankFile_WithMultipleDBLines)) { var orgSeq = parser1.Parse().First(); ISequenceFormatter formatter = new GenBankFormatter(); using (formatter.Open(tempFileName)) { formatter.Format(orgSeq); formatter.Close(); } } var same = Utility.CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines); File.Delete(tempFileName); Assert.IsTrue(same); ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop"); }
public void TestGenBankWhenUserSetsIncorrectAlphabet() { // parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.Protein; bool failed = false; try { var seqList = parser.Parse(_singleDnaSeqGenBankFilename); var x = seqList.ElementAt(0); failed = true; } catch (InvalidDataException) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for trying to create sequence using incorrect alphabet."); } }
public void GenBankFormatterValidateFormatTextWriter() { InitializeXmlVariables(); // Create a Sequence with all attributes. // parse and update the properties instead of parsing entire file. ISequenceParser parser1 = new GenBankParser(); IList <ISequence> seqList1 = parser1.Parse(FilePath); string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); Sequence orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence); orgSeq.Metadata.Add("GenBank", (GenBankMetadata)seqList1[0].Metadata["GenBank"]); orgSeq.ID = seqList1[0].ID; orgSeq.DisplayID = seqList1[0].DisplayID; orgSeq.MoleculeType = seqList1[0].MoleculeType; ISequenceFormatter formatter = new GenBankFormatter(); using (TextWriter writer = new StreamWriter(Constants.GenBankTempFileName)) { formatter.Format(orgSeq, writer); } // parse GenBankParser parserObj = new GenBankParser(); IList <ISequence> seqList = parserObj.Parse(Constants.GenBankTempFileName); ISequence seq = seqList[0]; // test the non-metadata properties if (0 == string.Compare(IsSequenceReadOnly, "true", false, CultureInfo.CurrentCulture)) { Assert.IsTrue(seq.IsReadOnly); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the ReadOnly Property"); } Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType); Assert.AreEqual(SeqId, seq.DisplayID); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GINumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ToString()); ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); File.Delete(Constants.GenBankTempFileName); }
public void GenBankParserValidateParseOneWithSpecificFormats() { InitializeXmlVariables(); // Initialization of xml strings. FilePath = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.FilePathNode); AlphabetName = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.AlphabetNameNode); SeqId = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.SequenceIdNode); StrandTopology = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTopologyNode); StrandType = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTypeNode); Div = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DivisionNode); Version = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.VersionNode); SequenceDate = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DateNode); PrimaryId = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.PrimaryIdNode); ExpectedSequence = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.ExpectedSequenceNode); // parse using (ISequenceParser parserObj = new GenBankParser(FilePath)) { parserObj.Alphabet = Alphabets.Protein; IEnumerable <ISequence> seq = parserObj.Parse(); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.ElementAt(0).Alphabet); Assert.AreEqual(SeqId, seq.ElementAt(0).ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.ElementAt(0).Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper( CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.ElementAt(0).Select(a => (char)a).ToArray())); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); } }