/// <summary> /// Reads the Genbank file and have it parsed by MBF library. /// </summary> /// <param name="genbankFileURL">Your genbank file path</param> /// <returns></returns> private SequenceList ParseSequencePath (string genbankFileURL) { if (IsOnline) throw new NotImplementedException ("online genbank reading is not supported in this version!"); //Download the file and parse it //Create the parser first ISequenceParser gbParser = new GenBankParser(); //Always Try parsing multi sequence in a file List<ISequence> mbfSequences = gbParser.Parse(genbankFileURL); SequenceList bioSeqList = new SequenceList(); foreach (Sequence mbfseq in mbfSequences) { ConvertToBioPatMLSeq(mbfseq); bioSeqList.Add(ConvertToBioPatMLSeq(mbfseq)); } return bioSeqList; }
private static void initiate_Updater() { int i = 1; Console.WriteLine("Connecting to database prokaryote_schema..."); try { conn = new MySqlConnection("Server=127.0.0.1;Database=prokaryote_schema;Uid=root;Pwd=Anitar@n@"); conn.Open(); Console.WriteLine("Connected to database prokaryote_schema\n"); newbacteriaspath = new List <string>(); extract_newbacterias(); Console.WriteLine("\nNumber of bacterias to be parsed to database: {0}\n", newbacteriaspath.Count); Console.WriteLine("Press any key to initiate the parsing"); Console.ReadKey(); Console.WriteLine("Loading newbacterias to database..."); foreach (var bacteria in newbacteriaspath) { Console.WriteLine("Parsing .gbff file [{0}/{1}]", i, newbacteriaspath.Count); List <ISequence> sequences = new List <ISequence>(); ISequenceParser parser = new Bio.IO.GenBank.GenBankParser(); using (parser.Open(bacteria)) { sequences = parser.Parse().ToList(); } int j = 1; foreach (var sequence in sequences) { Console.WriteLine("\tParsingSequence [{0}/{1}]", j, sequences.Count); parse_to_database(sequence); j++; } Console.WriteLine(); append_new_path(bacteria); i++; prokaryote_id = 0; } } catch (Exception ex) { Console.WriteLine("\nError occured: " + ex.Message); } finally { if (i - 1 == newbacteriaspath.Count && newbacteriaspath.Count != 0) { Console.WriteLine("\nDatabase Updated Successfully"); } else { Console.WriteLine("\nDatabase is not Updated"); } } }
//this functiion parses the data from .gbff file private void extract_sequences() { ISequenceParser parser = new Bio.IO.GenBank.GenBankParser(); //Console.WriteLine(path); using (parser.Open(this.path)) { this.sequences = parser.Parse().ToList(); } }
/// <summary> /// The param could also be a stringreader. /// </summary> /// <param name="reader"></param> /// <returns></returns> private SequenceList ParseSequencePath (TextReader reader) { //Create the parser first ISequenceParser gbParser = new GenBankParser(); //Always Try parsing multi sequence in a reader List<ISequence> mbfSequences = gbParser.Parse(reader); SequenceList bioSeqList = new SequenceList(); foreach (Sequence mbfseq in mbfSequences) { ConvertToBioPatMLSeq(mbfseq); bioSeqList.Add(ConvertToBioPatMLSeq(mbfseq)); } return bioSeqList; }
public void GenBankParserValidateParseFileName() { InitializeXmlVariables(); // parse ISequenceParser parserObj = new GenBankParser(); { IEnumerable<ISequence> seqList = parserObj.Parse(FilePath); ISequence seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting var metadata = (GenBankMetadata) seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString(null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ConvertToString()); ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the Sequence"); } }
public void GenBankFormatterValidateReadAndWriteMultipleDBLinks() { // Create a Sequence with all attributes. // parse and update the properties instead of parsing entire file. string tempFileName = Path.GetTempFileName(); ISequenceParser parser1 = new GenBankParser(); using (parser1.Open(_genBankFile_WithMultipleDBLines)) { var orgSeq = parser1.Parse().First(); ISequenceFormatter formatter = new GenBankFormatter(); using (formatter.Open(tempFileName)) { formatter.Format(orgSeq); formatter.Close(); } } var same = CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines); File.Delete(tempFileName); Assert.IsTrue(same); ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop"); }
public void GenBankFeatures() { // parse ISequence seq = new GenBankParser() .Parse(_singleProteinSeqGenBankFilename) .FirstOrDefault(); Assert.IsNotNull(seq); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; Assert.IsNotNull(metadata); List<CodingSequence> CDS = metadata.Features.CodingSequences; Assert.AreEqual(CDS.Count, 3); Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1); Assert.AreEqual(CDS[0].GeneSymbol, string.Empty); Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1); Assert.IsFalse(CDS[0].Pseudo); Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2); Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2); Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2); Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2); Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0); ISequence seq1 = new GenBankParser() .Parse(_genBankDataPath + @"\NC_001284.gbk") .FirstOrDefault(); Assert.IsNotNull(seq1); metadata = seq1.Metadata["GenBank"] as GenBankMetadata; Assert.IsNotNull(metadata); Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117); ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation(); byte[] tempData = new byte[seqTemp.Count]; for (int i = 0; i < seqTemp.Count; i++) { tempData[i] = seqTemp[i]; } string sequenceInString = Encoding.ASCII.GetString(tempData); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"')); Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count); }
public void TestGenBankForManyFiles() { // parser and formatter will be used for all files in input dir // iterate through the files in input dir, parsing and formatting each; write results // to log file DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath); foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk")) { ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine); IEnumerable<ISequence> seqList = new GenBankParser().Parse(fileInfo.FullName); ISequenceFormatter formatter = new GenBankFormatter(); using (formatter.Open(TempGenBankFileName)) { (formatter as GenBankFormatter).Format(seqList.ToList()); } using (var reader = new StreamReader(TempGenBankFileName)) { string actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); } }
public void TestGenBankWhenUserSetsDnaAlphabet() { // set correct alphabet and parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.DNA; ISequence seq = parser.Parse(_singleDnaSeqGenBankFilename).FirstOrDefault(); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); // format ISequenceFormatter formatter = new GenBankFormatter(); formatter.Format(seq, TempGenBankFileName); string actual = string.Empty; using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); // test the formatting Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", "")); }
public void TestGenBankWhenParsingOne() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault(); // test the non-metadata properties Assert.AreEqual(Alphabets.DNA, seq.Alphabet); Assert.AreEqual("SCU49845", seq.ID); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None); Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower(CultureInfo.CurrentCulture)); Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse("21-JUN-1999", (IFormatProvider)null), metadata.Locus.Date); Assert.AreEqual("1", metadata.Version.Version); Assert.AreEqual("1293613", metadata.Version.GiNumber); // test that we're correctly putting all types of metadata in the right places Assert.AreEqual(1, seq.Metadata.Count); IList<CitationReference> referenceList = metadata.References; Assert.AreEqual(3, referenceList.Count); IList<FeatureItem> featureList = metadata.Features.All; Assert.AreEqual(6, featureList.Count); Assert.AreEqual(4, featureList[0].Qualifiers.Count); Assert.AreEqual(5, featureList[1].Qualifiers.Count); Assert.AreEqual(1, featureList[2].Qualifiers.Count); // test the sequence string string expected = @"gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc"; Assert.AreEqual(expected, new string(seq.Select(a => (char)a).ToArray())); // format ISequenceFormatter formatter = new GenBankFormatter(); formatter.Format(seq, TempGenBankFileName); string actual = string.Empty; using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); // test the formatting Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", "")); }
public void TestGenBankParseOriginShifted2() { // parse GenBankParser parser = new GenBankParser(); ISequence seq = parser.Parse(_genBankFile_ParseOriginShifted2).FirstOrDefault(); Assert.IsNotNull(seq); }
public void TestGenBankEmptyOrganismClassification() { // parse GenBankParser parser = new GenBankParser(); ISequence seq = parser.Parse(_genBankFile_EmptyOrganismClassificationTest).FirstOrDefault(); Assert.IsNotNull(seq); }
/// <summary> /// Validate GenBank Get features with specified range. /// </summary> /// <param name="nodeName">xml node name.</param> /// <param name="methodName">name of method</param> private void ValidateGetFeatures(string nodeName, string methodName) { // Get Values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedFirstRangeStartPoint = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstRangeStartPoint); string expectedSecondRangeStartPoint = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondRangeStartPoint); string expectedFirstRangeEndPoint = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstRangeEndPoint); string expectedSecondRangeEndPoint = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondRangeEndPoint); string expectedCountWithinSecondRange = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FeaturesWithinSecondRange); string expectedCountWithinFirstRange = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FeaturesWithinFirstRange); // Parse a GenBank file. ISequenceParser parserObj = new GenBankParser(); { IEnumerable<ISequence> seq = parserObj.Parse(filePath); var metadata = seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata; List<CodingSequence> cdsList = metadata.Features.CodingSequences; string accessionNumber = cdsList[0].Location.Accession; if ((0 == string.Compare(methodName, "Accession", CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))) { // Validate GetFeature within specified range. Assert.AreEqual(metadata.GetFeatures(accessionNumber, Convert.ToInt32(expectedFirstRangeStartPoint, null), Convert.ToInt32(expectedFirstRangeEndPoint, null)) .Count.ToString((IFormatProvider) null), expectedCountWithinFirstRange); Assert.AreEqual(metadata.GetFeatures(accessionNumber, Convert.ToInt32(expectedSecondRangeStartPoint, null), Convert.ToInt32(expectedSecondRangeEndPoint, null)) .Count.ToString((IFormatProvider) null), expectedCountWithinSecondRange); } else { // Validate GetFeature within specified range. Assert.AreEqual(metadata.GetFeatures( Convert.ToInt32(expectedFirstRangeStartPoint, null), Convert.ToInt32(expectedFirstRangeEndPoint, null)).Count.ToString((IFormatProvider) null), expectedCountWithinFirstRange); Assert.AreEqual(metadata.GetFeatures( Convert.ToInt32(expectedSecondRangeStartPoint, null), Convert.ToInt32(expectedSecondRangeEndPoint, null)).Count.ToString((IFormatProvider) null), expectedCountWithinSecondRange); } } }
public void GenBankFormatterValidateWriteUsingStream() { InitializeXmlVariables(); // Create a Sequence with all attributes. // Parse and update the properties instead of parsing entire file. ISequenceParser parser1 = new GenBankParser(); { IEnumerable<ISequence> seqList1 = parser1.Parse(FilePath); string tempFileName = Path.GetTempFileName(); GenBankMetadata metadata = null; ISequence seq = null; string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); var orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence); orgSeq.Metadata.Add("GenBank", seqList1.ElementAt(0).Metadata["GenBank"]); orgSeq.ID = seqList1.ElementAt(0).ID; ISequenceFormatter formatter = new GenBankFormatter(); { using (formatter.Open(tempFileName)) { formatter.Format(orgSeq); } } var parserObj = new GenBankParser(); { IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName); seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting metadata = (GenBankMetadata) seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString(null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char) a).ToArray())); ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence"); File.Delete(tempFileName); } }
public void GenBankParserValidateParseOneWithSpecificFormats() { InitializeXmlVariables(); // Initialization of xml strings. FilePath = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.FilePathNode); AlphabetName = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.AlphabetNameNode); SeqId = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.SequenceIdNode); StrandTopology = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTopologyNode); StrandType = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTypeNode); Div = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DivisionNode); Version = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.VersionNode); SequenceDate = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DateNode); PrimaryId = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.PrimaryIdNode); ExpectedSequence = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.ExpectedSequenceNode); // parse ISequenceParser parserObj = new GenBankParser(); { parserObj.Alphabet = Alphabets.Protein; IEnumerable<ISequence> seq = parserObj.Parse(FilePath); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.ElementAt(0).Alphabet); Assert.AreEqual(SeqId, seq.ElementAt(0).ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting var metadata = (GenBankMetadata) seq.ElementAt(0).Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper( CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString(null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.ElementAt(0).Select(a => (char) a).ToArray())); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); } }
/// <summary> /// Validates GenBank Formatter for General test cases. /// </summary> /// <param name="seqList">sequence list.</param> private static void ValidateWriteGeneralTestCases(IEnumerable<ISequence> seqList1) { // Create a Sequence with all attributes. // Parse and update the properties instead of parsing entire file. string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); Sequence orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence); orgSeq.Metadata.Add("GenBank", (GenBankMetadata)seqList1.ElementAt(0).Metadata["GenBank"]); orgSeq.ID = seqList1.ElementAt(0).ID; string tempFileName = System.IO.Path.GetTempFileName(); ISequenceFormatter formatter = new GenBankFormatter(); formatter.Format(orgSeq, tempFileName); // parse GenBankParser parserObj = new GenBankParser(); IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName); ISequence seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Formatter P1: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); } if (metadata.Locus.DivisionCode != SequenceDivisionCode.None) { Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); } Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); if (0 != string.Compare(AlphabetName, "rna", CultureInfo.CurrentCulture, CompareOptions.IgnoreCase)) { Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); } else { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date Properties"); } string truncatedExpectedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper( CultureInfo.CurrentCulture); string truncatedActualSequence = new string(seq.Select(a => (char)a).ToArray()).Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper( CultureInfo.CurrentCulture); // test the sequence string Assert.AreEqual(truncatedExpectedSequence, truncatedActualSequence); ApplicationLog.WriteLine( "GenBank Formatter P1: Successfully validated the Sequence"); File.Delete(tempFileName); }
public void TestGenBankLocusTokenParser() { // parse GenBankParser parser = new GenBankParser(); ISequence seq = parser.Parse(_genBankFile_LocusTokenParserTest).FirstOrDefault(); Assert.IsNotNull(seq); }
/// <summary> /// Validate GenBank Citation referenced by passing featureItem present in GenBank Metadata. /// </summary> /// <param name="nodeName">xml node name.</param> private void ValidateCitationReferencedUsingFeatureItem(string nodeName) { // Get Values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedCitationReferenced = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.citationReferencedCount); // Parse a GenBank file. ISequenceParser parserObj = new GenBankParser(); { IEnumerable<ISequence> seq = parserObj.Parse(filePath); var metadata = seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata; IList<FeatureItem> featureList = metadata.Features.All; // Get a list citationReferenced present in GenBank file. List<CitationReference> citationReferenceList = metadata.GetCitationsReferredInFeature(featureList[0]); Assert.AreEqual(citationReferenceList.Count.ToString((IFormatProvider) null), expectedCitationReferenced); } }
public void TestGenBankParseVersionEmpty() { // parse GenBankParser parser = new GenBankParser(); ISequence seq = parser.Parse(_genBankFile_ParseVersionEmpty).FirstOrDefault(); Assert.IsNotNull(seq); }
/// <summary> /// Validate All qualifiers in CDS feature. /// </summary> /// <param name="nodeName">xml node name.</param> private void ValidateCDSQualifiers(string nodeName, string methodName) { // Get Values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedCDSProduct = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CDSProductQualifier); string expectedCDSException = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CDSException); string expectedCDSCodonStart = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CDSCodonStart); string expectedCDSLabel = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CDSLabel); string expectedCDSDBReference = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CDSDBReference); string expectedGeneSymbol = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.GeneSymbol); // Parse a GenBank file. ISequenceParser parserObj = new GenBankParser(); { IEnumerable<ISequence> seq = parserObj.Parse(filePath); var metadata = seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata; // Get CDS qaulifier.value. List<CodingSequence> cdsQualifiers = metadata.Features.CodingSequences; List<string> codonStartValue = cdsQualifiers[0].CodonStart; List<string> productValue = cdsQualifiers[0].Product; List<string> DBReferenceValue = cdsQualifiers[0].DatabaseCrossReference; // validate CDS qualifiers. if ((0 == string.Compare(methodName, "DNA", CultureInfo.CurrentCulture, CompareOptions.IgnoreCase)) || (0 == string.Compare(methodName, "RNA", CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))) { Assert.AreEqual(cdsQualifiers[0].Label, expectedCDSLabel); Assert.AreEqual(cdsQualifiers[0].Exception.ToString(null), expectedCDSException); Assert.AreEqual(productValue[0], expectedCDSProduct); Assert.AreEqual(codonStartValue[0], expectedCDSCodonStart); Assert.IsTrue(string.IsNullOrEmpty(cdsQualifiers[0].Allele)); Assert.IsFalse(string.IsNullOrEmpty(cdsQualifiers[0].Citation.ToString())); Assert.AreEqual(DBReferenceValue[0], expectedCDSDBReference); Assert.AreEqual(cdsQualifiers[0].GeneSymbol, expectedGeneSymbol); } else { Assert.AreEqual(cdsQualifiers[0].Label, expectedCDSLabel); Assert.AreEqual(cdsQualifiers[0].Exception.ToString(null), expectedCDSException); Assert.IsTrue(string.IsNullOrEmpty(cdsQualifiers[0].Allele)); Assert.IsFalse(string.IsNullOrEmpty(cdsQualifiers[0].Citation.ToString())); Assert.AreEqual(DBReferenceValue[0], expectedCDSDBReference); Assert.AreEqual(cdsQualifiers[0].GeneSymbol, expectedGeneSymbol); } } }
public void TestGenBankFailureWhenParsingEmpty() { bool failed = false; try { ISequenceParser parser = new GenBankParser(); parser.Parse(); failed = true; } catch (Exception) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string."); } }
/// <summary> /// Validate Seqeunce feature of GenBank file. /// </summary> /// <param name="nodeName">xml node name. for different alphabet</param> private void ValidateSequenceFeature(string nodeName) { // Get Values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string subSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSubSequence); string subSequenceStart = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SequenceStart); string subSequenceEnd = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SequenceEnd); ISequence firstFeatureSeq = null; // Parse a genBank file. ISequenceParser parserObj = new GenBankParser(); { IEnumerable<ISequence> seq = parserObj.Parse(filePath); var metadata = seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata; // Get Subsequence feature,start and end postions. firstFeatureSeq = metadata.Features.All[0].GetSubSequence(seq.ElementAt(0)); var sequenceString = new string(firstFeatureSeq.Select(a => (char) a).ToArray()); // Validate SubSequence. Assert.AreEqual(sequenceString, subSequence); Assert.AreEqual(metadata.Features.All[0].Location.LocationStart.ToString((IFormatProvider) null), subSequenceStart); Assert.AreEqual(metadata.Features.All[0].Location.LocationEnd.ToString((IFormatProvider) null), subSequenceEnd); Assert.IsNull(metadata.Features.All[0].Location.Accession); Assert.AreEqual(metadata.Features.All[0].Location.StartData, subSequenceStart); Assert.AreEqual(metadata.Features.All[0].Location.EndData, subSequenceEnd); } }
public void TestGenBankWhenParsingMultiple() { // parse ISequenceParser parser = new GenBankParser(); IEnumerable<ISequence> seqList = parser.Parse(_multipleSeqGenBankFilename); // Just check the number of items returned and that they're not empty. The guts // are tested in TestGenBankWhenParsingOne. Assert.AreEqual(2, seqList.Count()); Assert.AreEqual(105, seqList.ElementAt(0).Count); Assert.AreEqual(5028, seqList.ElementAt(1).Count); }
public void ValidateGenBankFeatureProperties() { // Get Values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.FilePathNode); string mRNAFeatureCount = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.mRNACount); string exonFeatureCount = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.ExonCount); string intronFeatureCount = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.IntronCount); string cdsFeatureCount = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.CDSCount); string allFeaturesCount = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.GenBankFeaturesCount); string GenesCount = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.GeneCount); string miscFeaturesCount = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.MiscFeatureCount); string rRNACount = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.rRNACount); string tRNACount = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.tRNACount); string zeroValue = utilityObj.xmlUtil.GetTextValue( Constants.DNAStandardFeaturesKeyNode, Constants.emptyCount); ISequenceParser parserObj = new GenBankParser(); IEnumerable<ISequence> seq = parserObj.Parse(filePath); // Get all metada features. Hitting all the properties in the metadata feature. var metadata = (GenBankMetadata) seq.ElementAt(0).Metadata[Constants.GenBank]; List<FeatureItem> allFeatures = metadata.Features.All; List<Minus10Signal> minus10Signal = metadata.Features.Minus10Signals; List<Minus35Signal> minus35Signal = metadata.Features.Minus35Signals; List<ThreePrimeUtr> threePrimeUTR = metadata.Features.ThreePrimeUTRs; List<FivePrimeUtr> fivePrimeUTR = metadata.Features.FivePrimeUTRs; List<Attenuator> attenuator = metadata.Features.Attenuators; List<CaatSignal> caatSignal = metadata.Features.CAATSignals; List<CodingSequence> CDS = metadata.Features.CodingSequences; List<DisplacementLoop> displacementLoop = metadata.Features.DisplacementLoops; List<Enhancer> enhancer = metadata.Features.Enhancers; List<Exon> exonList = metadata.Features.Exons; List<GcSingal> gcsSignal = metadata.Features.GCSignals; List<Gene> genesList = metadata.Features.Genes; List<InterveningDna> interveningDNA = metadata.Features.InterveningDNAs; List<Intron> intronList = metadata.Features.Introns; List<LongTerminalRepeat> LTR = metadata.Features.LongTerminalRepeats; List<MaturePeptide> matPeptide = metadata.Features.MaturePeptides; List<MiscBinding> miscBinding = metadata.Features.MiscBindings; List<MiscDifference> miscDifference = metadata.Features.MiscDifferences; List<MiscFeature> miscFeatures = metadata.Features.MiscFeatures; List<MiscRecombination> miscRecobination = metadata.Features.MiscRecombinations; List<MiscRna> miscRNA = metadata.Features.MiscRNAs; List<MiscSignal> miscSignal = metadata.Features.MiscSignals; List<MiscStructure> miscStructure = metadata.Features.MiscStructures; List<ModifiedBase> modifierBase = metadata.Features.ModifiedBases; List<MessengerRna> mRNA = metadata.Features.MessengerRNAs; List<NonCodingRna> nonCodingRNA = metadata.Features.NonCodingRNAs; List<OperonRegion> operonRegion = metadata.Features.OperonRegions; List<PolyASignal> polySignal = metadata.Features.PolyASignals; List<PolyASite> polySites = metadata.Features.PolyASites; List<PrecursorRna> precursorRNA = metadata.Features.PrecursorRNAs; List<ProteinBindingSite> proteinBindingSites = metadata.Features.ProteinBindingSites; List<RibosomeBindingSite> rBindingSites = metadata.Features.RibosomeBindingSites; List<ReplicationOrigin> repliconOrigin = metadata.Features.ReplicationOrigins; List<RepeatRegion> repeatRegion = metadata.Features.RepeatRegions; List<RibosomalRna> rRNA = metadata.Features.RibosomalRNAs; List<SignalPeptide> signalPeptide = metadata.Features.SignalPeptides; List<StemLoop> stemLoop = metadata.Features.StemLoops; List<TataSignal> tataSignals = metadata.Features.TATASignals; List<Terminator> terminator = metadata.Features.Terminators; List<TransferMessengerRna> tmRNA = metadata.Features.TransferMessengerRNAs; List<TransitPeptide> transitPeptide = metadata.Features.TransitPeptides; List<TransferRna> tRNA = metadata.Features.TransferRNAs; List<UnsureSequenceRegion> unSecureRegion = metadata.Features.UnsureSequenceRegions; List<Variation> variations = metadata.Features.Variations; // Validate GenBank Features. Assert.AreEqual(minus10Signal.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(minus35Signal.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(threePrimeUTR.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(fivePrimeUTR.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(caatSignal.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(attenuator.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(displacementLoop.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(enhancer.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(gcsSignal.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(genesList.Count.ToString((IFormatProvider) null), GenesCount); Assert.AreEqual(interveningDNA.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(LTR.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(matPeptide.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(miscBinding.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(miscDifference.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(miscFeatures.Count.ToString((IFormatProvider) null), miscFeaturesCount); Assert.AreEqual(miscRecobination.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(miscSignal.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(modifierBase.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(miscRNA.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(miscStructure.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(mRNA.Count.ToString((IFormatProvider) null), mRNAFeatureCount); Assert.AreEqual(nonCodingRNA.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(operonRegion.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(polySignal.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(polySites.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(precursorRNA.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(proteinBindingSites.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(rBindingSites.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(repliconOrigin.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(rRNA.Count.ToString((IFormatProvider) null), rRNACount); Assert.AreEqual(signalPeptide.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(stemLoop.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(tataSignals.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(repeatRegion.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(terminator.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(tmRNA.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(variations.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(tRNA.Count.ToString((IFormatProvider) null), tRNACount); Assert.AreEqual(transitPeptide.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(unSecureRegion.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(stemLoop.Count, Convert.ToInt32(zeroValue, null)); Assert.AreEqual(allFeatures.Count, Convert.ToInt32(allFeaturesCount, null)); Assert.AreEqual(CDS.Count, Convert.ToInt32(cdsFeatureCount, null)); Assert.AreEqual(exonList.Count, Convert.ToInt32(exonFeatureCount, null)); Assert.AreEqual(intronList.Count, Convert.ToInt32(intronFeatureCount, null)); }
public void TestGenBankWhenUserSetsIncorrectAlphabet() { // parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.Protein; bool failed = false; try { var seqList = parser.Parse(_singleDnaSeqGenBankFilename); var x = seqList.ElementAt(0); failed = true; } catch (InvalidDataException) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for trying to create sequence using incorrect alphabet."); } }
public void ValidateSequenceFeatureUsingReferencedSequence() { // Get Values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( Constants.GenBankFileSubSequenceNode, Constants.FilePathNode); string subSequence = utilityObj.xmlUtil.GetTextValue( Constants.GenBankFileSubSequenceNode, Constants.ExpectedSubSequence); string subSequenceStart = utilityObj.xmlUtil.GetTextValue( Constants.GenBankFileSubSequenceNode, Constants.SequenceStart); string subSequenceEnd = utilityObj.xmlUtil.GetTextValue( Constants.GenBankFileSubSequenceNode, Constants.SequenceEnd); string referenceSeq = utilityObj.xmlUtil.GetTextValue( Constants.GenBankFileSubSequenceNode, Constants.referenceSeq); ISequence sequence; ISequence firstFeatureSeq = null; // Parse a genBank file. var refSequence = new Sequence(Alphabets.RNA, referenceSeq); var parserObj = new GenBankParser(); sequence = parserObj.Parse(filePath).FirstOrDefault(); var metadata = sequence.Metadata[Constants.GenBank] as GenBankMetadata; // Get Subsequence feature,start and end postions. var referenceSequences = new Dictionary<string, ISequence>(); referenceSequences.Add(Constants.Reference, refSequence); firstFeatureSeq = metadata.Features.All[0].GetSubSequence(sequence, referenceSequences); var sequenceString = new string(firstFeatureSeq.Select(a => (char) a).ToArray()); // Validate SubSequence. Assert.AreEqual(sequenceString, subSequence); Assert.AreEqual(metadata.Features.All[0].Location.LocationStart.ToString((IFormatProvider) null), subSequenceStart); Assert.AreEqual(metadata.Features.All[0].Location.LocationEnd.ToString((IFormatProvider) null), subSequenceEnd); Assert.IsNull(metadata.Features.All[0].Location.Accession); Assert.AreEqual(metadata.Features.All[0].Location.StartData, subSequenceStart); Assert.AreEqual(metadata.Features.All[0].Location.EndData, subSequenceEnd); // Log to VSTest GUI ApplicationLog.WriteLine(string.Format(null, "GenBank Features BVT: Successfully validated the Subsequence feature '{0}'", sequenceString)); ApplicationLog.WriteLine(string.Format(null, "GenBank Features BVT: Successfully validated the start of subsequence'{0}'", metadata.Features.All[0].Location.LocationStart.ToString( (IFormatProvider) null))); }
public void GenBankProperties() { ISequenceParser parser = new GenBankParser(); Assert.AreEqual(parser.Name, Resource.GENBANK_NAME); Assert.AreEqual(parser.Description, Resource.GENBANKPARSER_DESCRIPTION); Assert.AreEqual(parser.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION); ISequenceFormatter formatter = new GenBankFormatter(); Assert.AreEqual(formatter.Name, Resource.GENBANK_NAME); Assert.AreEqual(formatter.Description, Resource.GENBANKFORMATTER_DESCRIPTION); Assert.AreEqual(formatter.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION); }
/// <summary> /// Validate GenBank features. /// </summary> /// <param name="nodeName">xml node name.</param> /// <param name="methodName">Name of the method</param> private void ValidateGenBankFeatures(string nodeName, string methodName) { // Get Values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string alphabetName = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlphabetNameNode); string expectedSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequenceNode); string mRNAFeatureCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.mRNACount); string exonFeatureCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExonCount); string intronFeatureCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.IntronCount); string cdsFeatureCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CDSCount); string allFeaturesCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.GenBankFeaturesCount); string expectedCDSKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CDSKey); string expectedIntronKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.IntronKey); string expectedExonKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExonKey); string mRNAKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.mRNAKey); string sourceKeyName = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SourceKey); string proteinKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ProteinKeyName); string tempFileName = Path.GetTempFileName(); ISequenceParser parserObj = new GenBankParser(); IEnumerable<ISequence> sequenceList = parserObj.Parse(filePath); if (sequenceList.Count() == 1) { string expectedUpdatedSequence = expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); var orgSeq = new Sequence(Utility.GetAlphabet(alphabetName), expectedUpdatedSequence); orgSeq.ID = sequenceList.ElementAt(0).ID; orgSeq.Metadata.Add(Constants.GenBank, sequenceList.ElementAt(0).Metadata[Constants.GenBank]); ISequenceFormatter formatterObj = new GenBankFormatter(); formatterObj.Format(orgSeq, tempFileName); } else { string expectedUpdatedSequence = expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); var orgSeq = new Sequence(Utility.GetAlphabet(alphabetName), expectedUpdatedSequence) { ID = sequenceList.ElementAt(1).ID }; orgSeq.Metadata.Add(Constants.GenBank, sequenceList.ElementAt(1).Metadata[Constants.GenBank]); ISequenceFormatter formatterObj = new GenBankFormatter(); formatterObj.Format(orgSeq, tempFileName); } // parse a temporary file. var tempParserObj = new GenBankParser(); { IEnumerable<ISequence> tempFileSeqList = tempParserObj.Parse(tempFileName); ISequence sequence = tempFileSeqList.ElementAt(0); var metadata = (GenBankMetadata) sequence.Metadata[Constants.GenBank]; // Validate formatted temporary file GenBank Features. Assert.AreEqual(metadata.Features.All.Count, Convert.ToInt32(allFeaturesCount, null)); Assert.AreEqual(metadata.Features.CodingSequences.Count, Convert.ToInt32(cdsFeatureCount, null)); Assert.AreEqual(metadata.Features.Exons.Count, Convert.ToInt32(exonFeatureCount, null)); Assert.AreEqual(metadata.Features.Introns.Count, Convert.ToInt32(intronFeatureCount, null)); Assert.AreEqual(metadata.Features.MessengerRNAs.Count, Convert.ToInt32(mRNAFeatureCount, null)); Assert.AreEqual(metadata.Features.Attenuators.Count, 0); Assert.AreEqual(metadata.Features.CAATSignals.Count, 0); Assert.AreEqual(metadata.Features.DisplacementLoops.Count, 0); Assert.AreEqual(metadata.Features.Enhancers.Count, 0); Assert.AreEqual(metadata.Features.Genes.Count, 0); if ((0 == string.Compare(methodName, "DNA", CultureInfo.CurrentCulture, CompareOptions.IgnoreCase)) || (0 == string.Compare(methodName, "RNA", CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))) { IList<FeatureItem> featureList = metadata.Features.All; Assert.AreEqual(featureList[0].Key.ToString(null), sourceKeyName); Assert.AreEqual(featureList[1].Key.ToString(null), mRNAKey); Assert.AreEqual(featureList[3].Key.ToString(null), expectedCDSKey); Assert.AreEqual(featureList[5].Key.ToString(null), expectedExonKey); Assert.AreEqual(featureList[6].Key.ToString(null), expectedIntronKey); ApplicationLog.WriteLine( "GenBank Features BVT: Successfully validated the GenBank Features"); ApplicationLog.WriteLine(string.Format(null, "GenBank Features BVT: Successfully validated the CDS feature '{0}'", featureList[3].Key.ToString(null))); ApplicationLog.WriteLine(string.Format(null, "GenBank Features BVT: Successfully validated the Exon feature '{0}'", featureList[5].Key.ToString(null))); } else { IList<FeatureItem> proFeatureList = metadata.Features.All; Assert.AreEqual(proFeatureList[0].Key.ToString(null), sourceKeyName); Assert.AreEqual(proFeatureList[1].Key.ToString(null), proteinKey); Assert.AreEqual(proFeatureList[2].Key.ToString(null), expectedCDSKey); ApplicationLog.WriteLine( "GenBank Features BVT: Successfully validated the GenBank Features"); ApplicationLog.WriteLine(string.Format(null, "GenBank Features BVT: Successfully validated the CDS feature '{0}'", proFeatureList[2].Key.ToString(null))); ApplicationLog.WriteLine(string.Format(null, "GenBank Features BVT: Successfully validated the Source feature '{0}'", proFeatureList[0].Key.ToString(null))); } } File.Delete(tempFileName); }
public void TestParsingREFSEQPrimaryHeader() { // Test parsing Primary header which contains table with header. // REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP var results = new GenBankParser() .Parse(_genBankFile_WithREFSEQPrimaryData) .ToList(); }
/// <summary> /// Validate addition of GenBank features. /// </summary> /// <param name="nodeName">xml node name.</param> private void ValidateAdditionGenBankFeatures(string nodeName) { // Get Values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string alphabetName = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.AlphabetNameNode); string expectedSequence = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExpectedSequenceNode); string addFirstKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstKey); string addSecondKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondKey); string addFirstLocation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstLocation); string addSecondLocation = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondLocation); string addFirstQualifier = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FirstQualifier); string addSecondQualifier = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.SecondQualifier); ISequenceParser parser1 = new GenBankParser(); { IEnumerable<ISequence> seqList1 = parser1.Parse(filePath); var localBuilderObj = new LocationBuilder(); string tempFileName = Path.GetTempFileName(); string expectedUpdatedSequence = expectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); var orgSeq = new Sequence(Utility.GetAlphabet(alphabetName), expectedUpdatedSequence); orgSeq.ID = seqList1.ElementAt(0).ID; orgSeq.Metadata.Add(Constants.GenBank, seqList1.ElementAt(0).Metadata[Constants.GenBank]); ISequenceFormatter formatterObj = new GenBankFormatter(); { formatterObj.Format(orgSeq, tempFileName); // parse GenBank file. var parserObj = new GenBankParser(); { IEnumerable<ISequence> seqList = parserObj.Parse(tempFileName); ISequence seq = seqList.ElementAt(0); var metadata = (GenBankMetadata) seq.Metadata[Constants.GenBank]; // Add a new features to Genbank features list. metadata.Features = new SequenceFeatures(); var feature = new FeatureItem(addFirstKey, addFirstLocation); var qualifierValues = new List<string>(); qualifierValues.Add(addFirstQualifier); qualifierValues.Add(addFirstQualifier); feature.Qualifiers.Add(addFirstQualifier, qualifierValues); metadata.Features.All.Add(feature); feature = new FeatureItem(addSecondKey, addSecondLocation); qualifierValues = new List<string>(); qualifierValues.Add(addSecondQualifier); qualifierValues.Add(addSecondQualifier); feature.Qualifiers.Add(addSecondQualifier, qualifierValues); metadata.Features.All.Add(feature); // Validate added GenBank features. Assert.AreEqual(metadata.Features.All[0].Key.ToString(null), addFirstKey); Assert.AreEqual( localBuilderObj.GetLocationString(metadata.Features.All[0].Location), addFirstLocation); Assert.AreEqual(metadata.Features.All[1].Key.ToString(null), addSecondKey); Assert.AreEqual(localBuilderObj.GetLocationString(metadata.Features.All[1].Location), addSecondLocation); parserObj.Close(); } File.Delete(tempFileName); } } }
/// <summary> /// Validate GenBank standard features key. /// </summary> /// <param name="nodeName">xml node name.</param> /// <param name="methodName">Name of the method</param> private void ValidateStandardFeaturesKey(string nodeName, string methodName) { // Get Values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string expectedCondingSeqCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CDSCount); string exonFeatureCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.ExonCount); string expectedtRNA = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.tRNACount); string expectedGeneCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.GeneCount); string miscFeatureCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.MiscFeatureCount); string expectedCDSKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.CDSKey); string expectedIntronKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.IntronKey); string mRNAKey = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.mRNAKey); string allFeaturesCount = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.StandardFeaturesCount); // Parse a file. ISequenceParser parserObj = new GenBankParser(); IEnumerable<ISequence> seq = parserObj.Parse(filePath); var metadata = seq.ElementAt(0).Metadata[Constants.GenBank] as GenBankMetadata; if ((0 == string.Compare(methodName, "DNA", CultureInfo.CurrentCulture, CompareOptions.IgnoreCase)) || (0 == string.Compare(methodName, "RNA", CultureInfo.CurrentCulture, CompareOptions.IgnoreCase))) { // Validate standard features keys. Assert.AreEqual(metadata.Features.CodingSequences.Count.ToString((IFormatProvider) null), expectedCondingSeqCount); Assert.AreEqual(metadata.Features.Exons.Count.ToString((IFormatProvider) null), exonFeatureCount); Assert.AreEqual(metadata.Features.TransferRNAs.Count.ToString((IFormatProvider) null), expectedtRNA); Assert.AreEqual(metadata.Features.Genes.Count.ToString((IFormatProvider) null), expectedGeneCount); Assert.AreEqual(metadata.Features.MiscFeatures.Count.ToString((IFormatProvider) null), miscFeatureCount); Assert.AreEqual(StandardFeatureKeys.CodingSequence.ToString(null), expectedCDSKey); Assert.AreEqual(StandardFeatureKeys.Intron.ToString(null), expectedIntronKey); Assert.AreEqual(StandardFeatureKeys.MessengerRna.ToString(null), mRNAKey); Assert.AreEqual(StandardFeatureKeys.All.Count.ToString((IFormatProvider) null), allFeaturesCount); } else { Assert.AreEqual(metadata.Features.CodingSequences.Count.ToString((IFormatProvider) null), expectedCondingSeqCount); Assert.AreEqual(StandardFeatureKeys.CodingSequence.ToString(null), expectedCDSKey); } }
/// <summary> /// Validates GenBank Parser for specific test cases /// which takes ISequenceParser as input. /// <param name="parser">ISequenceParser object.</param> /// </summary> //private static void ValidateParserSpecialTestCases(ISequenceParser parserObj) private static void ValidateParserSpecialTestCases() { ISequenceParser parserObj = new GenBankParser(); { Assert.IsTrue(File.Exists(FilePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format("GenBank Parser : File Exists in the Path '{0}'.", FilePath)); IEnumerable<ISequence> seqList = parserObj.Parse(FilePath); ISequence seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper( CultureInfo.CurrentCulture)); } if (metadata.Locus.DivisionCode != SequenceDivisionCode.None) { Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); } Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); if (0 != string.Compare(AlphabetName, "rna", CultureInfo.CurrentCulture,CompareOptions.IgnoreCase)) { Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); } else { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the StrandType, StrandTopology, Division, Date Properties"); } // Replace all the empty spaces, paragraphs and new line for validation string updatedExpSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper( CultureInfo.CurrentCulture); string updatedActualSequence = new string(seq.Select(a => (char)a).ToArray()).Replace("\r", "").Replace("\n", "").Replace(" ", "").ToUpper( CultureInfo.CurrentCulture); Assert.AreEqual(updatedExpSequence, updatedActualSequence); ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the Sequence"); } }