Пример #1
0
        public void GenBankFormatterValidateWriteWithFilePath()
        {
            InitializeXmlVariables();
            using (ISequenceParser parserObj = new GenBankParser(FilePath))
            {
                IEnumerable <ISequence> seqList1 = parserObj.Parse();
                string tempFileName            = System.IO.Path.GetTempFileName();
                string expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                Sequence orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);
                orgSeq.ID = seqList1.ElementAt(0).ID;
                orgSeq.Metadata.Add("GenBank", (GenBankMetadata)seqList1.ElementAt(0).Metadata["GenBank"]);
                using (ISequenceFormatter formatter = new GenBankFormatter(tempFileName))
                {
                    formatter.Write(orgSeq);
                    formatter.Close();

                    // parse
                    ISequenceParser         parserObjFromFile = new GenBankParser(tempFileName);
                    IEnumerable <ISequence> seqList           =
                        parserObjFromFile.Parse();
                    ISequence seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    GenBankMetadata metadata =
                        (GenBankMetadata)orgSeq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType,
                                        metadata.Locus.Strand.ToString());
                    }
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                    metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                    Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                    Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                    metadata.Locus.Date);
                    Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                    // test the sequence string
                    Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray()));
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Sequence");
                    Console.WriteLine(string.Format((IFormatProvider)null,
                                                    "GenBank Formatter BVT: Successfully validated the Sequence '{0}'",
                                                    ExpectedSequence));
                    parserObjFromFile.Close();
                    parserObjFromFile.Dispose();
                    File.Delete(tempFileName);
                }
            }
        }
Пример #2
0
        public void GenBankFeatures()
        {
            // parse
            ISequenceParser       parser   = new GenBankParser(_singleProteinSeqGenBankFilename);
            ISequence             seq      = parser.Parse().FirstOrDefault();
            GenBankMetadata       metadata = seq.Metadata["GenBank"] as GenBankMetadata;
            List <CodingSequence> CDS      = metadata.Features.CodingSequences;

            Assert.AreEqual(CDS.Count, 3);
            Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1);
            Assert.AreEqual(CDS[0].GeneSymbol, string.Empty);
            Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1);
            Assert.IsFalse(CDS[0].Pseudo);
            Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2);
            Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0);
            parser.Close();
            parser.Dispose();
            ISequenceParser parser1 = new GenBankParser(_genBankDataPath + @"\NC_001284.gbk");
            ISequence       seq1    = parser1.Parse().FirstOrDefault();

            metadata = seq1.Metadata["GenBank"] as GenBankMetadata;
            Assert.AreEqual(metadata.Features.All.Count, 743);
            Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
            Assert.AreEqual(metadata.Features.Exons.Count, 32);
            Assert.AreEqual(metadata.Features.Introns.Count, 22);
            Assert.AreEqual(metadata.Features.Genes.Count, 60);
            Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
            Assert.AreEqual(metadata.Features.Promoters.Count, 17);
            Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
            Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
            Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117);
            ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation();

            byte[] tempData = new byte[seqTemp.Count];
            for (int i = 0; i < seqTemp.Count; i++)
            {
                tempData[i] = seqTemp[i];
            }
            string sequenceInString = ASCIIEncoding.ASCII.GetString(tempData);

            Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"'));
            Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count);
        }
Пример #3
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);

            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);
                ISequenceParser parser = new GenBankParser(fileInfo.FullName);


                try
                {
                    IEnumerable <ISequence> seqList = parser.Parse();
                    // don't do anything with it; just make sure it doesn't crash
                    ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName);
                    string             actual    = string.Empty;

                    (formatter as GenBankFormatter).Write(seqList.ToList());

                    using (StreamReader reader = new StreamReader(TempGenBankFileName))
                    {
                        actual = reader.ReadToEnd();
                    }
                    File.Delete(TempGenBankFileName);
                    parser.Close();
                    parser.Dispose();

                    ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine);
                }
                catch (Exception e)
                {
                    ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine);
                    throw;
                }
            }
        }