public void GenBankFormatterValidateWriteWithFilePath() { InitializeXmlVariables(); using (ISequenceParser parserObj = new GenBankParser(FilePath)) { IEnumerable <ISequence> seqList1 = parserObj.Parse(); string tempFileName = System.IO.Path.GetTempFileName(); string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); Sequence orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence); orgSeq.ID = seqList1.ElementAt(0).ID; orgSeq.Metadata.Add("GenBank", (GenBankMetadata)seqList1.ElementAt(0).Metadata["GenBank"]); using (ISequenceFormatter formatter = new GenBankFormatter(tempFileName)) { formatter.Write(orgSeq); formatter.Close(); // parse ISequenceParser parserObjFromFile = new GenBankParser(tempFileName); IEnumerable <ISequence> seqList = parserObjFromFile.Parse(); ISequence seq = seqList.ElementAt(0); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)orgSeq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray())); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); parserObjFromFile.Close(); parserObjFromFile.Dispose(); File.Delete(tempFileName); } } }
/// <summary> /// The execution method for the activity. /// </summary> /// <param name="executionContext">The execution context.</param> /// <returns>The execution status.</returns> protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext) { String inputFileName = InputFile; GenBankParser parser = new GenBankParser(); parser.Open(inputFileName); SequenceList = parser.Parse().ToList(); parser.Close(); return(ActivityExecutionStatus.Closed); }
public void GenBankFeatures() { // parse ISequenceParser parser = new GenBankParser(_singleProteinSeqGenBankFilename); ISequence seq = parser.Parse().FirstOrDefault(); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; List <CodingSequence> CDS = metadata.Features.CodingSequences; Assert.AreEqual(CDS.Count, 3); Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1); Assert.AreEqual(CDS[0].GeneSymbol, string.Empty); Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1); Assert.IsFalse(CDS[0].Pseudo); Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2); Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2); Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2); Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2); Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0); parser.Close(); parser.Dispose(); ISequenceParser parser1 = new GenBankParser(_genBankDataPath + @"\NC_001284.gbk"); ISequence seq1 = parser1.Parse().FirstOrDefault(); metadata = seq1.Metadata["GenBank"] as GenBankMetadata; Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117); ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation(); byte[] tempData = new byte[seqTemp.Count]; for (int i = 0; i < seqTemp.Count; i++) { tempData[i] = seqTemp[i]; } string sequenceInString = ASCIIEncoding.ASCII.GetString(tempData); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"')); Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count); }
public void TestGenBankForManyFiles() { // parser and formatter will be used for all files in input dir // iterate through the files in input dir, parsing and formatting each; write results // to log file DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath); foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk")) { ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine); ISequenceParser parser = new GenBankParser(fileInfo.FullName); try { IEnumerable <ISequence> seqList = parser.Parse(); // don't do anything with it; just make sure it doesn't crash ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName); string actual = string.Empty; (formatter as GenBankFormatter).Write(seqList.ToList()); using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); parser.Close(); parser.Dispose(); ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine); } catch (Exception e) { ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine); throw; } } }