public void GenBankFormatterValidateWriteWithFilePath() { InitializeXmlVariables(); ISequenceParser parserObj = new GenBankParser(); { IEnumerable <ISequence> seqList1 = parserObj.Parse(FilePath); string tempFileName = Path.GetTempFileName(); string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); var orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence) { ID = seqList1.ElementAt(0).ID }; orgSeq.Metadata.Add("GenBank", seqList1.ElementAt(0).Metadata["GenBank"]); ISequenceFormatter formatter = new GenBankFormatter(); { formatter.Format(orgSeq, tempFileName); // parse ISequenceParser parserObjFromFile = new GenBankParser(); IEnumerable <ISequence> seqList = parserObjFromFile.Parse(tempFileName); ISequence seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting var metadata = (GenBankMetadata)orgSeq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString(null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray())); ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence"); File.Delete(tempFileName); } } }
public void TestGenBankFeaturesWithBinaryFormatter() { Stream stream = null; try { stream = File.Open("GenbankMetadata.data", FileMode.Create); BinaryFormatter formatter = new BinaryFormatter(); ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(@"testdata\GenBank\NC_001284.gbk"); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString()); Assert.AreEqual(metadata.GetFeatures(11918, 12241).Count, 2); formatter.Serialize(stream, metadata); stream.Seek(0, SeekOrigin.Begin); GenBankMetadata deserializedMetadata = (GenBankMetadata)formatter.Deserialize(stream); Assert.AreNotSame(metadata, deserializedMetadata); Assert.AreEqual(deserializedMetadata.Features.All.Count, 743); Assert.AreEqual(deserializedMetadata.Features.CodingSequences.Count, 117); Assert.AreEqual(deserializedMetadata.Features.Exons.Count, 32); Assert.AreEqual(deserializedMetadata.Features.Introns.Count, 22); Assert.AreEqual(deserializedMetadata.Features.Genes.Count, 60); Assert.AreEqual(deserializedMetadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(deserializedMetadata.Features.Promoters.Count, 17); Assert.AreEqual(deserializedMetadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(deserializedMetadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(deserializedMetadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString()); Assert.AreEqual(deserializedMetadata.GetFeatures(11918, 12241).Count, 2); } catch { Assert.Fail(); } finally { if (stream != null) { stream.Close(); stream = null; } } }
public void GenBankParserValidateParseFileName() { // parse ISequenceParser parserObj = new GenBankParser(); IList <ISequence> seqList = parserObj.Parse(FilePath); ISequence seq = seqList[0]; // test the non-metadata properties if (0 == string.Compare(IsSequenceReadOnly, "true", false, CultureInfo.CurrentCulture)) { Assert.IsTrue(seq.IsReadOnly); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the ReadOnly Property"); } Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType); Assert.AreEqual(SeqId, seq.DisplayID); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString()); Assert.AreEqual(PrimaryId, metadata.Version.GINumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ToString()); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format(null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); }
/// <summary> /// Finds a suitable parser that supports the specified file, opens the file and returns the parser. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <returns>If found returns the open parser as ISequenceParser else returns null.</returns> public static ISequenceParser FindParserByFileName(string fileName) { ISequenceParser parser = null; if (!string.IsNullOrEmpty(fileName)) { if (Helper.IsZippedFasta(fileName)) { parser = new FastAZippedParser(fileName); } else if (Helper.IsZippedFastQ(fileName)) { parser = new FastQZippedParser(fileName); } else if (IsFasta(fileName)) { parser = new FastAParser(fileName); } else if (IsFastQ(fileName)) { parser = new FastQParser(fileName); } else if (Helper.IsBAM(fileName)) { throw new NotImplementedException(); //parser = new BAMSequenceParser(fileName); } else if (IsGenBank(fileName)) { parser = new GenBankParser(fileName); } else { // Do a search through the known parsers to pick up custom parsers added through add-in. string fileExtension = Path.GetExtension(fileName); if (!string.IsNullOrEmpty(fileExtension)) { parser = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension)); // If we found a match based on extension, then open the file - this // matches the above behavior where a specific parser was created for // the passed filename - the parser is opened automatically in the constructor. if (parser != null) { parser.Open(fileName); } } } } return(parser); }
public void GenBankProperties() { ISequenceParser parser = new GenBankParser(); Assert.AreEqual(parser.Name, Resource.GENBANK_NAME); Assert.AreEqual(parser.Description, Resource.GENBANKPARSER_DESCRIPTION); Assert.AreEqual(parser.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION); ISequenceFormatter formatter = new GenBankFormatter(); Assert.AreEqual(formatter.Name, Resource.GENBANK_NAME); Assert.AreEqual(formatter.Description, Resource.GENBANKFORMATTER_DESCRIPTION); Assert.AreEqual(formatter.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION); }
public void GenBankParserValidateParseFileNameWithStream() { InitializeXmlVariables(); List <ISequence> seq = null; IEnumerable <ISequence> seqList = null; // Parse the Stream. using (ISequenceParser parserObj = new GenBankParser()) { using (StreamReader reader = new StreamReader(FilePath)) { seqList = parserObj.Parse(reader); seq = seqList.ToList(); } Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq[0].Alphabet); Assert.AreEqual(SeqId, seq[0].ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq[0].Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq[0].Select(a => (char)a).ToArray())); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); } }
public void TestGenBankWhenParsingOne() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(_singleProteinSeqGenBankFilename); // test the non-metadata properties Assert.IsTrue(seq.IsReadOnly); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); Assert.AreEqual(MoleculeType.DNA, seq.MoleculeType); Assert.AreEqual("SCU49845", seq.DisplayID); Assert.AreEqual("SCU49845", seq.ID); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None); Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower()); Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse("21-JUN-1999"), metadata.Locus.Date); Assert.AreEqual("1", metadata.Version.Version); Assert.AreEqual("1293613", metadata.Version.GINumber); // test that we're correctly putting all types of metadata in the right places Assert.AreEqual(1, seq.Metadata.Count); IList <CitationReference> referenceList = metadata.References; Assert.AreEqual(3, referenceList.Count); IList <FeatureItem> featureList = metadata.Features.All; Assert.AreEqual(6, featureList.Count); Assert.AreEqual(4, featureList[0].Qualifiers.Count); Assert.AreEqual(5, featureList[1].Qualifiers.Count); Assert.AreEqual(1, featureList[2].Qualifiers.Count); // test the sequence string string expected = @"GATCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCCGACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCTGCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGAACCGCCAATAGACAACATATGTAACATATTTAGGATATACCTCGAAAATAATAAACCGCCACACTGTCATTATTATAATTAGAAACAGAACGCAAAAATTATCCACTATATAATTCAAAGACGCGAAAAAAAAAGAACAACGCGTCATAGAACTTTTGGCAATTCGCGTCACAAATAAATTTTGGCAACTTATGTTTCCTCTTCGAGCAGTACTCGAGCCCTGTCTCAAGAATGTAATAATACCCATCGTAGGTATGGTTAAAGATAGCATCTCCACAACCTCAAAGCTCCTTGCCGAGAGTCGCCCTCCTTTGTCGAGTAATTTTCACTTTTCATATGAGAACTTATTTTCTTATTCTTTACTCTCACATCCTGTAGTGATTGACACTGCAACAGCCACCATCACTAGAAGAACAGAACAATTACTTAATAGAAAAATTATATCTTCCTCGAAACGATTTCCTGCTTCCAACATCTACGTATATCAAGAAGCATTCACTTACCATGACACAGCTTCAGATTTCATTATTGCTGACAGCTACTATATCACTACTCCATCTAGTAGTGGCCACGCCCTATGAGGCATATCCTATCGGAAAACAATACCCCCCAGTGGCAAGAGTCAATGAATCGTTTACATTTCAAATTTCCAATGATACCTATAAATCGTCTGTAGACAAGACAGCTCAAATAACATACAATTGCTTCGACTTACCGAGCTGGCTTTCGTTTGACTCTAGTTCTAGAACGTTCTCAGGTGAACCTTCTTCTGACTTACTATCTGATGCGAACACCACGTTGTATTTCAATGTAATACTCGAGGGTACGGACTCTGCCGACAGCACGTCTTTGAACAATACATACCAATTTGTTGTTACAAACCGTCCATCCATCTCGCTATCGTCAGATTTCAATCTATTGGCGTTGTTAAAAAACTATGGTTATACTAACGGCAAAAACGCTCTGAAACTAGATCCTAATGAAGTCTTCAACGTGACTTTTGACCGTTCAATGTTCACTAACGAAGAATCCATTGTGTCGTATTACGGACGTTCTCAGTTGTATAATGCGCCGTTACCCAATTGGCTGTTCTTCGATTCTGGCGAGTTGAAGTTTACTGGGACGGCACCGGTGATAAACTCGGCGATTGCTCCAGAAACAAGCTACAGTTTTGTCATCATCGCTACAGACATTGAAGGATTTTCTGCCGTTGAGGTAGAATTCGAATTAGTCATCGGGGCTCACCAGTTAACTACCTCTATTCAAAATAGTTTGATAATCAACGTTACTGACACAGGTAACGTTTCATATGACTTACCTCTAAACTATGTTTATCTCGATGACGATCCTATTTCTTCTGATAAATTGGGTTCTATAAACTTATTGGATGCTCCAGACTGGGTGGCATTAGATAATGCTACCATTTCCGGGTCTGTCCCAGATGAATTACTCGGTAAGAACTCCAATCCTGCCAATTTTTCTGTGTCCATTTATGATACTTATGGTGATGTGATTTATTTCAACTTCGAAGTTGTCTCCACAACGGATTTGTTTGCCATTAGTTCTCTTCCCAATATTAACGCTACAAGGGGTGAATGGTTCTCCTACTATTTTTTGCCTTCTCAGTTTACAGACTACGTGAATACAAACGTTTCATTAGAGTTTACTAATTCAAGCCAAGACCATGACTGGGTGAAATTCCAATCATCTAATTTAACATTAGCTGGAGAAGTGCCCAAGAATTTCGACAAGCTTTCATTAGGTTTGAAAGCGAACCAAGGTTCACAATCTCAAGAGCTATATTTTAACATCATTGGCATGGATTCAAAGATAACTCACTCAAACCACAGTGCGAATGCAACGTCCACAAGAAGTTCTCACCACTCCACCTCAACAAGTTCTTACACATCTTCTACTTACACTGCAAAAATTTCTTCTACCTCCGCTGCTGCTACTTCTTCTGCTCCAGCAGCGCTGCCAGCAGCCAATAAAACTTCATCTCACAATAAAAAAGCAGTAGCAATTGCGTGCGGTGTTGCTATCCCATTAGGCGTTATCCTAGTAGCTCTCATTTGCTTCCTAATATTCTGGAGACGCAGAAGGGAAAATCCAGACGATGAAAACTTACCGCATGCTATTAGTGGACCTGATTTGAATAATCCTGCAAATAAACCAAATCAAGAAAACGCTACACCTTTGAACAACCCCTTTGATGATGATGCTTCCTCGTACGATGATACTTCAATAGCAAGAAGATTGGCTGCTTTGAACACTTTGAAATTGGATAACCACTCTGCCACTGAATCTGATATTTCCAGCGTGGATGAAAAGAGAGATTCTCTATCAGGTATGAATACATACAATGATCAGTTCCAATCCCAAAGTAAAGAAGAATTATTAGCAAAACCCCCAGTACAGCCTCCAGAGAGCCCGTTCTTTGACCCACAGAATAGGTCTTCTTCTGTGTATATGGATAGTGAACCAGCAGTAAATAAATCCTGGCGATATACTGGCAACCTGTCACCAGTCTCTGATATTGTCAGAGACAGTTACGGATCACAAAAAACTGTTGATACAGAAAAACTTTTCGATTTAGAAGCACCAGAGAAGGAAAAACGTACGTCAAGGGATGTCACTATGTCTTCACTGGACCCTTGGAACAGCAATATTAGCCCTTCTCCCGTAAGAAAATCAGTAACACCATCACCATATAACGTAACGAAGCATCGTAACCGCCACTTACAAAATATTCAAGACTCTCAAAGCGGTAAAAACGGAATCACTCCCACAACAATGTCAACTTCATCTTCTGACGATTTTGTTCCGGTTAAAGATGGTGAAAATTTTTGCTGGGTCCATAGCATGGAACCAGACAGAAGACCAAGTAAGAAAAGGTTAGTAGATTTTTCAAATAAGAGTAATGTCAATGTTGGTCAAGTTAAGGACATTCACGGACGCATCCCAGAAATGCTGTGATTATACGCAACGATATTTTGCTTAATTTTATTTTCCTGTTTTATTTTTTATTAGTGGTTTACAGATACCCTATATTTTATTTAGTTTTTATACTTAGAGACATTTAATTTTAATTCCATTCTTCAAATTTCATTTTTGCACTTAAAACAAAGATCCAAAAATGCTCTCGCCCTCTTCATATTGAGAATACACTCCATTCAAAATTTTGTCGTCACCGCTGATTAATTTTTCACTAAACTGATGAATAATCAAAGGCCCCACGTCAGAACCGACTAAAGAAGTGAGTTTTATTTTAGGAGGTTGAAAACCATTATTGTCTGGTAAATTTTCATCTTCTTGACATTTAACCCAGTTTGAATCCCTTTCAATTTCTGCTTTTTCCTCCAAACTATCGACCCTCCTGTTTCTGTCCAACTTATGTCCTAGTTCCAATTCGATCGCATTAATAACTGCTTCAAATGTTATTGTGTCATCGTTGACTTTAGGTAATTTCTCCAAATGCATAATCAAACTATTTAAGGAAGATCGGAATTCGTCGAACACTTCAGTTTCCGTAATGATCTGATCGTCTTTATCCACATGTTGTAATTCACTAAAATCTAAAACGTATTTTTCAATGCATAAATCGTTCTTTTTATTAATAATGCAGATGGAAAATCTGTAAACGTGCGTTAATTTAGAAAGAACATCCAGTATAAGTTCTTCTATATAGTCAATTAAAGCAGGATGCCTATTAATGGGAACGAACTGCGGCAAGTTGAATGACTGGTAAGTAGTGTAGTCGAATGACTGAGGTGGGTATACATTTCTATAAAATAAAATCAAATTAATGTAGCATTTTAAGTATACCCTCAGCCACTTCTCTACCCATCTATTCATAAAGCTGACGCAACGATTACTATTTTTTTTTTCTTCTTGGATCTCAGTCGTCGCAAAAACGTATACCTTCTTTTTCCGACCTTTTTTTTAGCTTTCTGGAAAAGTTTATATTAGTTAAACAGGGTCTAGTCTTAGTGTGAAAGCTAGTGGTTTCGATTGACTGATATTAAGAAAGTGGAAATTAAATTAGTAGTGTAGACGTATATGCATATGTATTTCTCGCCTGTTTATGTTTCTACGTACTTTTGATTTATAGCAAGGGGAAAAGAAATACATACTATTTTTTGGTAAAGGTGAAAGCATAATGTAAAAGCTAGAATAAAATGGACGAAATAAAGAGAGGCTTAGTTCATCTTTTTTCCAAAAAGCACCCAATGATAATAACTAAAATGAAAAGGATTTGCCATCTGTCAGCAACATCAGTTGTGTGAGCAATAATAAAATCATCACCTCCGTTGCCTTTAGCGCGTTTGTCGTTTGTATCTTCCGTAATTTTAGTCTTATCAATGGGAATCATAAATTTTCCAATGAATTAGCAATTTCGTCCAATTCTTTTTGAGCTTCTTCATATTTGCTTTGGAATTCTTCGCACTTCTTTTCCCATTCATCTCTTTCTTCTTCCAAAGCAACGATCCTTCTACCCATTTGCTCAGAGTTCAAATCGGCCTCTTTCAGTTTATCCATTGCTTCCTTCAGTTTGGCTTCACTGTCTTCTAGCTGTTGTTCTAGATCCTGGTTTTTCTTGGTGTAGTTCTCATTATTAGATCTCAAGTTATTGGAGTCTTCAGCCAATTGCTTTGTATCAGACAATTGACTCTCTAACTTCTCCACTTCACTGTCGAGTTGCTCGTTTTTAGCGGACAAAGATTTAATCTCGTTTTCTTTTTCAGTGTTAGATTGCTCTAATTCTTTGAGCTGTTCTCTCAGCTCCTCATATTTTTCTTGCCATGACTCAGATTCTAATTTTAAGCTATTCAATTTCTCTTTGATC"; Assert.AreEqual(expected, seq.ToString()); // format ISequenceFormatter formatter = new GenBankFormatter(); string actual = formatter.FormatString(seq); // test the formatting Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", "")); }
public void ValidateBasicDerivedSequenceWithGenBankFormat() { // Gets the expected sequence from the Xml string expectedSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.ExpectedSequenceNode); string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.FilePathNode); Assert.IsTrue(File.Exists(geneBankFilePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Concat( "Sequence BVT: Sequence BVT: The File exist in the Path ", geneBankFilePath)); // Parse a GenBank file Using Parse method and convert the same to sequence. ISequenceParser parser = new GenBankParser(); IList <ISequence> sequence = parser.Parse(geneBankFilePath); Assert.IsNotNull(sequence); Sequence geneBankSeq = (Sequence)sequence[0]; Assert.IsNotNull(geneBankSeq); Assert.AreEqual(expectedSequence, geneBankSeq.ToString()); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The GenBank Sequence is as expected.")); byte[] tmpEncodedSeq = new byte[geneBankSeq.Count]; (geneBankSeq as IList <byte>).CopyTo(tmpEncodedSeq, 0); Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The GenBank Sequence Length is as expected.")); // Create a derived Sequences for the fastA file sequence. BasicDerivedSequence genebankDerivedSeq = new BasicDerivedSequence(geneBankSeq, false, false, -1, -1); // validate the DerivedSequence with originalSequence. Assert.IsNotNull(genebankDerivedSeq); Assert.AreEqual(expectedSequence, genebankDerivedSeq.ToString()); Assert.AreEqual(geneBankSeq.ToString(), genebankDerivedSeq.ToString()); ApplicationLog.WriteLine(string.Concat( "Sequence BVT: The BasicDerived Sequence is as expected.")); // Logs to Nunit GUI. Console.WriteLine( "Sequence BVT: Validation of GenBank file Sequence is completed successfully."); }
public void ValidateSequenceInsertWithGenBankFormat() { // Gets the expected sequence from the Xml string expectedSequenceCount = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.SimpleFastaSequenceCount); string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.FilePathNode); string expectedSeqAfterAdd = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGeneBankNodeName, Constants.ExpectedSeqAfterAdd); string seqAfterAdd = string.Empty; string alphabetName = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.AlphabetNameNode); string actualSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleDnaAlphabetNode, Constants.ExpectedSingleChar); string seqBeforeAdding = string.Empty; // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence {0} is expected.", actualSequence, alphabetName)); Sequence seqItem = new Sequence(Utility.GetAlphabet(alphabetName), "TCGN"); Assert.IsTrue(File.Exists(geneBankFilePath)); // Logs information to the log file ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: The File exist in the Path {0}.", geneBankFilePath)); ISequenceParser parser = new GenBankParser(); // Parse a FastA file Using Parse method and convert the same to sequence. IList <ISequence> sequence = parser.Parse(geneBankFilePath); Sequence Seq = (Sequence)sequence[0]; Seq.IsReadOnly = false; seqBeforeAdding = Seq.ToString(); Seq.Add(seqItem[0]); // Validate sequence list after adding sequence item to the sequence list. seqAfterAdd = Seq.ToString(); Assert.AreEqual(seqAfterAdd, expectedSeqAfterAdd); Assert.AreNotEqual(seqAfterAdd, seqBeforeAdding); Assert.AreEqual(Seq.Count.ToString((IFormatProvider)null), expectedSequenceCount); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "Sequence BVT: Sequence {0} is expected.", seqAfterAdd)); }
public void GenBankFeatures() { // parse ISequenceParser parser = new GenBankParser(_singleProteinSeqGenBankFilename); ISequence seq = parser.Parse().FirstOrDefault(); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; List <CodingSequence> CDS = metadata.Features.CodingSequences; Assert.AreEqual(CDS.Count, 3); Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1); Assert.AreEqual(CDS[0].GeneSymbol, string.Empty); Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1); Assert.IsFalse(CDS[0].Pseudo); Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2); Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2); Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2); Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2); Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0); parser.Close(); parser.Dispose(); ISequenceParser parser1 = new GenBankParser(_genBankDataPath + @"\NC_001284.gbk"); ISequence seq1 = parser1.Parse().FirstOrDefault(); metadata = seq1.Metadata["GenBank"] as GenBankMetadata; Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117); ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation(); byte[] tempData = new byte[seqTemp.Count]; for (int i = 0; i < seqTemp.Count; i++) { tempData[i] = seqTemp[i]; } string sequenceInString = ASCIIEncoding.ASCII.GetString(tempData); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"')); Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count); }
public void TestGenBankWhenUserSetsDnaAlphabet() { // set correct alphabet and parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.DNA; ISequence seq = parser.ParseOne(_singleDnaSeqGenBankFilename); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); // format ISequenceFormatter formatter = new GenBankFormatter(); string actual = formatter.FormatString(seq); // test the formatting Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", "")); }
void InvalidateGenBankParser(string node) { // Initialization of xml strings. FilePath = utilityObj.xmlUtil.GetTextValue(node, Constants.FilePathNode); try { GenBankParser parserObj = new GenBankParser(FilePath); if (string.Equals(Constants.SimpleGenBankNodeName, node)) { parserObj.LocationBuilder = null; } else if (string.Equals(Constants.SimpleGenBankPrimaryNode, node)) { parserObj.Alphabet = Alphabets.RNA; } //parserObj.ParseOne(FilePath); parserObj.Parse(); Assert.Fail(); } catch (InvalidOperationException) { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the exception:"); Console.WriteLine( "GenBank Parser : Successfully validated the exception:"); } catch (InvalidDataException) { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the exception:"); Console.WriteLine( "GenBank Parser : Successfully validated the exception:"); } catch (Exception) { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the exception:"); Console.WriteLine( "GenBank Parser : Successfully validated the exception:"); } }
public void TestGenBankFailureWhenParsingEmpty() { bool failed = false; try { ISequenceParser parser = new GenBankParser(); parser.Parse(); failed = true; } catch (Exception) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string."); } }
/// <summary> /// Finds a suitable parser that supports the specified file, opens the file and returns the parser. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <returns>If found returns the open parser as ISequenceParser else returns null.</returns> public static ISequenceParser FindParserByFileName(string fileName) { ISequenceParser parser = null; if (!string.IsNullOrEmpty(fileName)) { if (IsFasta(fileName)) { parser = new FastAParser(fileName); } else if (IsFastQ(fileName)) { parser = new FastQParser(fileName); } else if (IsGenBank(fileName)) { parser = new GenBankParser(fileName); } else if (fileName.EndsWith(Properties.Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase)) { parser = new GffParser(fileName); } else { // Do a search through the known parsers to pick up custom parsers added through add-in. string fileExtension = Path.GetExtension(fileName); if (!string.IsNullOrEmpty(fileExtension)) { parser = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension)); // If we found a match based on extension, then open the file - this // matches the above behavior where a specific parser was created for // the passed filename - the parser is opened automatically in the constructor. if (parser != null) { parser.Open(fileName); } } } } return(parser); }
public void TestGenBankForManyFiles() { // parser and formatter will be used for all files in input dir // iterate through the files in input dir, parsing and formatting each; write results // to log file DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath); foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk")) { ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine); ISequenceParser parser = new GenBankParser(fileInfo.FullName); try { IEnumerable <ISequence> seqList = parser.Parse(); // don't do anything with it; just make sure it doesn't crash ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName); string actual = string.Empty; (formatter as GenBankFormatter).Write(seqList.ToList()); using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); parser.Close(); parser.Dispose(); ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine); } catch (Exception e) { ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine); throw; } } }
public void TestGenBankWhenUserSetsIncorrectAlphabet() { // parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.Protein; bool failed = false; try { ISequence seq = parser.ParseOne(_singleDnaSeqGenBankFilename); failed = true; } catch (Exception) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for trying to create sequence using incorrect alphabet."); } }
public void GenBankParserValidateParseFileName() { InitializeXmlVariables(); // parse ISequenceParser parserObj = new GenBankParser(); { IEnumerable <ISequence> seqList = parserObj.Parse(FilePath); ISequence seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting var metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString(null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ConvertToString()); ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the Sequence"); } }
/// <summary> /// Returns parser which supports the specified file. /// </summary> /// <param name="fileName">File name for which the parser is required.</param> /// <param name="parserName">Name of the parser to use.</param> /// <returns>If found returns the open parser as ISequenceParser else returns null.</returns> public static ISequenceParser FindParserByName(string fileName, string parserName) { ISequenceParser parser = null; if (!string.IsNullOrEmpty(fileName) && !string.IsNullOrEmpty(parserName)) { if (parserName == Properties.Resource.FastAName) { parser = new FastAParser(fileName); } else if (parserName == Properties.Resource.FastQName) { parser = new FastQParser(fileName); } else if (parserName == Properties.Resource.GENBANK_NAME) { parser = new GenBankParser(fileName); } else if (parserName == Properties.Resource.GFF_NAME) { parser = new GffParser(fileName); } else { // Do a search through the known parsers to pick up custom parsers added through add-in. parser = All.FirstOrDefault(p => p.Name == parserName); // If we found a match based on extension, then open the file - this // matches the above behavior where a specific parser was created for // the passed filename - the parser is opened automatically in the constructor. if (parser != null) { parser.Open(fileName); } } } return(parser); }
public void GenBankFormatterValidateReadAndWriteMultipleDBLinks() { // Create a Sequence with all attributes. // parse and update the properties instead of parsing entire file. string tempFileName = Path.GetTempFileName(); ISequenceParser parser1 = new GenBankParser(); using (parser1.Open(_genBankFile_WithMultipleDBLines)) { var orgSeq = parser1.Parse().First(); ISequenceFormatter formatter = new GenBankFormatter(); using (formatter.Open(tempFileName)) { formatter.Format(orgSeq); formatter.Close(); } } var same = Utility.CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines); File.Delete(tempFileName); Assert.IsTrue(same); ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop"); }
public void GenBankParserValidateParseOneWithSpecificFormats() { InitializeXmlVariables(); // Initialization of xml strings. FilePath = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.FilePathNode); AlphabetName = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.AlphabetNameNode); SeqId = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.SequenceIdNode); StrandTopology = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTopologyNode); StrandType = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTypeNode); Div = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DivisionNode); Version = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.VersionNode); SequenceDate = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DateNode); PrimaryId = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.PrimaryIdNode); ExpectedSequence = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.ExpectedSequenceNode); // parse using (ISequenceParser parserObj = new GenBankParser(FilePath)) { parserObj.Alphabet = Alphabets.Protein; IEnumerable <ISequence> seq = parserObj.Parse(); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.ElementAt(0).Alphabet); Assert.AreEqual(SeqId, seq.ElementAt(0).ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.ElementAt(0).Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper( CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.ElementAt(0).Select(a => (char)a).ToArray())); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); } }
public void TestGenBankLocusTokenParser() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(_genBankFile_LocusTokenParserTest); }
public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection) { string queryName = rec.DisplayID.ToString().Split(' ')[0]; // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList <BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; // For each HSP for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity, Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage, hitsProcessed, Up.BlastMaxNumHits); // if HSP passes user-defined thresholds if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; string nextScore = "no"; if ((i + 1) < record.Hits.Count()) { if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score) { nextScore = "less than"; } else { nextScore = "equal"; } } else { nextScore = "non existent"; } // parse GI numner from hit long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; // init item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // write pairwise alignment writePairwiseAlignment(Up, blastHit, j, itemId); // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; CodingSequence bestCds = null; IList <FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (gbMeta.Features.CodingSequences.Count > 0) { bestCds = gbMeta.Features.CodingSequences[0]; } for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++) { CodingSequence cds = gbMeta.Features.CodingSequences[k]; //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start); int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start); int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start); if (cdsSize > bestSize) { bestCds = cds; } } foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case "InputOrder": facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case "NextScore": facet = new Facet(f.Name, f.Type, nextScore); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { if (bestItem != null) { value = "annotated"; } else { value = "unannotated"; } } } facet = new Facet(f.Name, f.Type, value); break; default: //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank); facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank); break; } /* * if (f.Name == "InputOrder") * { * facet = new Facet(f.Name, f.Type, seqPos); * } * * else * { * facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item); * } */ item.Facets.Add(facet); } } //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above) catch { Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); item.Href = "#"; foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case ("NextScore"): facet = new Facet(f.Name, f.Type, "no"); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { value = "unannotated"; } } facet = new Facet(f.Name, f.Type, value); break; default: facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank); break; } item.Facets.Add(facet); } //throw (e); } // Add item to collection, increment to next item, collection.Items.Add(item); hitsProcessed += 1; itemId += 1; } } } } if ((record.Hits.Count()) == 0 || (hitsProcessed == 0)) { // Init Pivot item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); item.Href = "#"; string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // Write pairwise alignment to file. writePairwiseAlignment(Up, itemId); // Set facet values for each facet category to default values foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case ("QuerySequence"): facet = new Facet(f.Name, f.Type, rec.ToString()); break; default: facet = CreateFacet(f.Name, f.Type, record, item, 0); break; } item.Facets.Add(facet); } // Add item to collection, increment to next item, skip remaining code collection.Items.Add(item); itemId += 1; hitsProcessed += 1; } } } return(itemId); }
public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos) { // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. int[] annotatedIndex = new int[2]; annotatedIndex[0] = -1; annotatedIndex[1] = -1; string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList <BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; IList <FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (bestItem != null) { annotatedIndex[0] = i; annotatedIndex[1] = j; return(annotatedIndex); } } catch { Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); } hitsProcessed += 1; } } } } } } return(annotatedIndex); }
public void TestGenBankWhenParsingOneOfMany() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(_multipleSeqGenBankFilename); }
private void DoGenBank() { int progValue = 0; Dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Normal, new Action(delegate() { (CurrentControl as UserControl5).UserControl5Step1.Foreground = System.Windows.Media.Brushes.Black; progressBar1.Value = progValue; })); string inputDir = Up.ProjectDir + "\\xml"; if (!Directory.Exists(inputDir)) { throw new Exception("Directory " + inputDir + " does not exist."); } string[] blastXmlFiles = Directory.GetFiles(inputDir, "*.xml"); int c = 1; Stack<GenBankItem> giList = new Stack<GenBankItem>(); foreach (string blastFile in blastXmlFiles) { BlastXmlParser blastParser = new BlastXmlParser(); progValue = Convert.ToInt32(Math.Round((double)c / blastXmlFiles.Count() * 100, 0)); UpdateProgressBar(progValue, "Filtering results"); try { IList<BlastResult> blastResults = blastParser.Parse(blastFile); List<GenBankItem> recordGiList = BlastUtil.filter(blastResults, Up.BlastMaxNumHits, Up.BlastMaxEvalue, Up.BlastMinPercentIdentity, Up.BlastMinPercentQueryCoverage); foreach (GenBankItem gi in recordGiList) { giList.Push(gi); Debug.WriteLine(gi.HitStart.ToString() + " " + gi.HitEnd.ToString()); } } catch { FatalErrorDialog("Cannot parse " + blastFile); Debug.WriteLine("Cannot parse " + blastFile); } c += 1; } progValue = 0; Dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Normal, new Action(delegate() { (CurrentControl as UserControl5).UserControl5Step2.Foreground = System.Windows.Media.Brushes.Black; progressBar1.Value = progValue; })); int totalGi = giList.Count(); GenBankParser genkBankParser = new GenBankParser(); int unParsableCount = 0; int notDownloadedCount = 0; string unParsableGIs = ""; string notDownloadedGIs = ""; bool isConnected = true; if (!IsConnectedToInternet()) { isConnected = false; MessageBox.Show("Your internet connection appears to be down. As a result, missing GenBank records will not be downloaded."); } while (giList.Count > 0) { GenBankItem gitem = giList.Pop(); progValue = Convert.ToInt32(Math.Round(((totalGi - giList.Count()) / (double)totalGi) * 100, 0)); UpdateProgressBar(progValue, "Downloading GenBank records"); string outFilename = Up.ProjectDir + "\\gb\\" + gitem.Id; outFilename += "_" + gitem.HitStart.ToString(); outFilename += "_" + gitem.HitEnd.ToString(); outFilename += ".gb"; WebClient wc = new WebClient(); if (File.Exists(outFilename)) { try { ISequence gpitem = genkBankParser.ParseOne(outFilename); } catch { if (isConnected) { string url = GetGenbankUrl(gitem); try { wc.DownloadFile(url, outFilename); Thread.Sleep(1000); } catch { wc.Proxy = null; giList.Push(gitem); } try { ISequence gpitem = genkBankParser.ParseOne(outFilename); } catch { unParsableCount += 1; unParsableGIs += gitem.Id + ","; } } else { notDownloadedCount += 1; notDownloadedGIs += gitem.Id + ","; } } } else { if (isConnected) { string url = GetGenbankUrl(gitem); try { wc.DownloadFile(url, outFilename); Thread.Sleep(1000); } catch { wc.Proxy = null; giList.Push(gitem); } try { ISequence gpitem = genkBankParser.ParseOne(outFilename); } catch { unParsableCount += 1; unParsableGIs += gitem.Id + ","; } } else { notDownloadedCount += 1; notDownloadedGIs += gitem.Id + ","; } } } if (notDownloadedCount > 0) { MessageBox.Show("Error downloading GenBank records: " + notDownloadedGIs + ".\r\nThis is likely caused by an interruption in the internet connection. Re-attempt the download by repeating this step.\r\n"); } if (unParsableCount > 0) { MessageBox.Show("Error parsing GenBank records: " + unParsableGIs + ".\r\nThis is likely due to an unsupported field in the GenBank record. Contact the MBF development team at http://mbf.codeplex.com, and include one of the GI numbers in the bug report.\r\nYou can copy this message to the clipboard using Ctrl-C.\r\n"); } }
public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos) { // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. int[] annotatedIndex = new int[2]; annotatedIndex[0] = -1; annotatedIndex[1] = -1; string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList<BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; IList<FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (bestItem != null) { annotatedIndex[0] = i; annotatedIndex[1] = j; return annotatedIndex; } } catch { Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); } hitsProcessed += 1; } } } } } } return annotatedIndex; }
public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection) { string queryName = rec.DisplayID.ToString().Split(' ')[0]; // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList<BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; // For each HSP for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity, Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage, hitsProcessed, Up.BlastMaxNumHits); // if HSP passes user-defined thresholds if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; string nextScore = "no"; if ((i + 1) < record.Hits.Count()) { if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score) { nextScore = "less than"; } else { nextScore = "equal"; } } else { nextScore = "non existent"; } // parse GI numner from hit long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; // init item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // write pairwise alignment writePairwiseAlignment(Up, blastHit, j, itemId); // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; CodingSequence bestCds = null; IList<FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (gbMeta.Features.CodingSequences.Count > 0) { bestCds = gbMeta.Features.CodingSequences[0]; } for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++) { CodingSequence cds = gbMeta.Features.CodingSequences[k]; //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start); int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start); int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start); if (cdsSize > bestSize) { bestCds = cds; } } foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case "InputOrder": facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case "NextScore": facet = new Facet(f.Name, f.Type, nextScore); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; }else{ if (bestItem != null) { value = "annotated"; }else{ value = "unannotated"; } } } facet = new Facet(f.Name, f.Type, value); break; default: //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank); facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank); break; } /* if (f.Name == "InputOrder") { facet = new Facet(f.Name, f.Type, seqPos); } else { facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item); } */ item.Facets.Add(facet); } } //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above) catch { Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); item.Href = "#"; foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case ("NextScore"): facet = new Facet(f.Name, f.Type, "no"); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { value = "unannotated"; } } facet = new Facet(f.Name, f.Type, value); break; default: facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank); break; } item.Facets.Add(facet); } //throw (e); } // Add item to collection, increment to next item, collection.Items.Add(item); hitsProcessed += 1; itemId += 1; } } } } if ((record.Hits.Count()) == 0 || (hitsProcessed == 0)) { // Init Pivot item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); item.Href = "#"; string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // Write pairwise alignment to file. writePairwiseAlignment(Up, itemId); // Set facet values for each facet category to default values foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case ("QuerySequence"): facet = new Facet(f.Name, f.Type, rec.ToString()); break; default: facet = CreateFacet(f.Name, f.Type, record, item, 0); break; } item.Facets.Add(facet); } // Add item to collection, increment to next item, skip remaining code collection.Items.Add(item); itemId += 1; hitsProcessed += 1; } } } return itemId; }
public void GenBankParserValidateParseOneWithSpecificFormats() { InitializeXmlVariables(); // Initialization of xml strings. FilePath = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.FilePathNode); AlphabetName = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.AlphabetNameNode); MolType = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.MoleculeTypeNode); IsSequenceReadOnly = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.IsReadOnlyNode); SeqId = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.SequenceIdNode); StrandTopology = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTopologyNode); StrandType = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTypeNode); Div = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DivisionNode); Version = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.VersionNode); SequenceDate = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DateNode); PrimaryId = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.PrimaryIdNode); ExpectedSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.ExpectedSequenceNode); // parse BasicSequenceParser parserObj = new GenBankParser(); parserObj.Alphabet = Alphabets.Protein; parserObj.Encoding = NcbiEAAEncoding.Instance; ISequence seq = parserObj.ParseOne(FilePath); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType); Assert.AreEqual(SeqId, seq.DisplayID); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper( CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GINumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ToString()); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); }
public void GenBankFormatterValidateFormatFilePath() { InitializeXmlVariables(); ISequenceParser parserObj = new GenBankParser(); IList <ISequence> seqList1 = parserObj.Parse(FilePath); string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); Sequence orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence); orgSeq.ID = seqList1[0].ID; orgSeq.MoleculeType = seqList1[0].MoleculeType; orgSeq.Metadata.Add("GenBank", (GenBankMetadata)seqList1[0].Metadata["GenBank"]); ISequenceFormatter formatter = new GenBankFormatter(); formatter.Format(orgSeq, Constants.GenBankTempFileName); // parse parserObj = new GenBankParser(); IList <ISequence> seqList = parserObj.Parse(Constants.GenBankTempFileName); ISequence seq = seqList[0]; // test the non-metadata properties if (0 == string.Compare(IsSequenceReadOnly, "true", false, CultureInfo.CurrentCulture)) { Assert.IsTrue(seq.IsReadOnly); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the ReadOnly Property"); } Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType); Assert.AreEqual(SeqId, seq.DisplayID); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)orgSeq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GINumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ToString()); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); File.Delete(Constants.GenBankTempFileName); }
public void TestFeatureItem() { ISequence seq; ISequence featureSeq = null; GenBankParser parser = new GenBankParser(); string _genBankDataPath = @"TestUtils\GenBank"; seq = parser.ParseOne(_genBankDataPath + @"\BK000016-tpa.gbk"); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; #region Test GetSubSequence Method featureSeq = metadata.Features.All[0].GetSubSequence(seq); int start = metadata.Features.All[0].Location.Start - 1; int end = metadata.Features.All[0].Location.End - start; Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString()); featureSeq = metadata.Features.All[1].GetSubSequence(seq); start = metadata.Features.All[1].Location.Start - 1; end = metadata.Features.All[1].Location.End - start; Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString()); seq = new Sequence(Alphabets.DNA, "ACGTAAAGGT"); Sequence refSeq = new Sequence(Alphabets.DNA, "AAAAATTTT"); LocationBuilder locbuilder = new LocationBuilder(); ILocation loc = locbuilder.GetLocation("join(complement(4..8),Ref1:5..7)"); Assert.AreEqual("join(complement(4..8),Ref1:5..7)", locbuilder.GetLocationString(loc)); FeatureItem fi = new FeatureItem("Feature1", loc); Dictionary <string, ISequence> refSeqs = new Dictionary <string, ISequence>(); refSeqs.Add("Ref1", refSeq); ISequence result = fi.GetSubSequence(seq, refSeqs); Assert.AreEqual("ATTTCATT", result.ToString()); #endregion #region Test GetSubFeatures Method SequenceFeatures seqFeatures = new SequenceFeatures(); FeatureItem source = new FeatureItem("Source", "1..1509"); FeatureItem mRNA = new FeatureItem("mRNA", "join(10..567,789..1320)"); FeatureItem cds = new FeatureItem("CDS", "join(54..567,789..1254)"); FeatureItem exon1 = new FeatureItem("Exon", "10..567"); FeatureItem intron = new FeatureItem("Intron", "568..788"); FeatureItem exon2 = new FeatureItem("Exon", "789..1320"); seqFeatures.All.Add(source); seqFeatures.All.Add(mRNA); seqFeatures.All.Add(cds); seqFeatures.All.Add(exon1); seqFeatures.All.Add(intron); seqFeatures.All.Add(exon2); List <FeatureItem> subFeatures = source.GetSubFeatures(seqFeatures); Assert.AreEqual(5, subFeatures.Count); subFeatures = mRNA.GetSubFeatures(seqFeatures); Assert.AreEqual(4, subFeatures.Count); subFeatures = cds.GetSubFeatures(seqFeatures); Assert.AreEqual(1, subFeatures.Count); subFeatures = exon1.GetSubFeatures(seqFeatures); Assert.AreEqual(0, subFeatures.Count); subFeatures = intron.GetSubFeatures(seqFeatures); Assert.AreEqual(0, subFeatures.Count); subFeatures = exon2.GetSubFeatures(seqFeatures); Assert.AreEqual(0, subFeatures.Count); #endregion }