Beispiel #1
0
        public void GenBankFormatterValidateWriteUsingStream()
        {
            InitializeXmlVariables();

            // Create a Sequence with all attributes.
            // Parse and update the properties instead of parsing entire file.
            using (ISequenceParser parser1 = new GenBankParser(FilePath))
            {
                IEnumerable <ISequence> seqList1 = parser1.Parse();
                string          tempFileName     = System.IO.Path.GetTempFileName();
                GenBankMetadata metadata         = null;
                ISequence       seq = null;
                string          expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                Sequence orgSeq =
                    new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);
                orgSeq.Metadata.Add("GenBank",
                                    (GenBankMetadata)seqList1.ElementAt(0).Metadata["GenBank"]);
                orgSeq.ID = seqList1.ElementAt(0).ID;

                using (ISequenceFormatter formatter = new GenBankFormatter())
                {
                    using (StreamWriter writer = new StreamWriter(tempFileName))
                    {
                        formatter.Open(writer);
                        formatter.Write(orgSeq);
                    }
                }
                using (GenBankParser parserObj = new GenBankParser(tempFileName))
                {
                    IEnumerable <ISequence> seqList = parserObj.Parse();
                    seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    metadata = (GenBankMetadata)seq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
                    }
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray()));
                ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
                Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence));
                File.Delete(tempFileName);
            }
        }
Beispiel #2
0
        public void TestGenBankWhenUserSetsProteinAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault();

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();

            using (formatter.Open(TempGenBankFileName))
                formatter.Format(seq);

            string actual = string.Empty;

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }

            File.Delete(TempGenBankFileName);
            // test the formatting
            Assert.AreEqual(Utility.CleanupWhiteSpace(_singleProteinSeqGenBankFileExpectedOutput),
                            Utility.CleanupWhiteSpace(actual));
        }
Beispiel #3
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);

            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);

                IEnumerable <ISequence> seqList = new GenBankParser().Parse(fileInfo.FullName);

                ISequenceFormatter formatter = new GenBankFormatter();
                using (formatter.Open(TempGenBankFileName))
                {
                    (formatter as GenBankFormatter).Format(seqList.ToList());
                }

                using (var reader = new StreamReader(TempGenBankFileName))
                {
                    string actual = reader.ReadToEnd();
                }

                File.Delete(TempGenBankFileName);
            }
        }
Beispiel #4
0
        /// <summary>
        /// The execution method for the activity.
        /// </summary>
        /// <param name="executionContext">The execution context.</param>
        /// <returns>The execution status.</returns>
        protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext)
        {
            GenBankFormatter formatter = new GenBankFormatter();

            formatter.Open(OutputFile);
            if ((Sequence == null) && (SequenceList != null))
            {
                foreach (ISequence sequence in SequenceList)
                {
                    formatter.Write(sequence);
                }
            }
            else if ((Sequence != null) && (SequenceList == null))
            {
                formatter.Write(Sequence);
            }
            else if ((Sequence != null) && (SequenceList != null))
            {
                foreach (ISequence sequence in SequenceList)
                {
                    formatter.Write(sequence);
                }

                formatter.Write(Sequence);
            }

            formatter.Close();
            return(ActivityExecutionStatus.Closed);
        }
Beispiel #5
0
        public void GenBankFormatterValidateReadAndWriteMultipleDBLinks()
        {
            // Create a Sequence with all attributes.
            // parse and update the properties instead of parsing entire file.
            string          tempFileName = Path.GetTempFileName();
            ISequenceParser parser1      = new GenBankParser();

            using (parser1.Open(_genBankFile_WithMultipleDBLines))
            {
                var orgSeq = parser1.Parse().First();
                ISequenceFormatter formatter = new GenBankFormatter();
                using (formatter.Open(tempFileName))
                {
                    formatter.Format(orgSeq);
                    formatter.Close();
                }
            }
            var same = Utility.CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines);

            File.Delete(tempFileName);
            Assert.IsTrue(same);
            ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop");
        }