Exemplo n.º 1
0
        public void ValidateSeqFormatterProperties()
        {
            // Gets the expected sequence from the Xml
            string fastaFormatterName = this.utilityObj.xmlUtil.GetTextValue(Constants.FastAFileParserNode,
                                                                             Constants.ParserNameNode);
            string genBankFormatterName = this.utilityObj.xmlUtil.GetTextValue(Constants.GenBankFileParserNode,
                                                                               Constants.ParserNameNode);
            string gffFormatterName = this.utilityObj.xmlUtil.GetTextValue(Constants.GffFileParserNode,
                                                                           Constants.ParserNameNode);
            string fastQFormatterName = this.utilityObj.xmlUtil.GetTextValue(Constants.FastQFileParserNode,
                                                                             Constants.ParserNameNode);

            // Get SequenceFormatter class properties.
            FastAFormatter actualFastAFormatter = SequenceFormatters.Fasta;
            IReadOnlyList <ISequenceFormatter> allFormatters = SequenceFormatters.All;
            GenBankFormatter actualgenBankFormatterName      = SequenceFormatters.GenBank;
            FastQFormatter   actualFastQFormatterName        = SequenceFormatters.FastQ;
            GffFormatter     actualGffFormatterName          = SequenceFormatters.Gff;

            // Validate Sequence Formatter
            Assert.AreEqual(fastaFormatterName, actualFastAFormatter.Name);
            Assert.AreEqual(genBankFormatterName, actualgenBankFormatterName.Name);
            Assert.AreEqual(gffFormatterName, actualGffFormatterName.Name);
            Assert.AreEqual(fastQFormatterName, actualFastQFormatterName.Name);
            Assert.IsNotNull(allFormatters);
            ApplicationLog.WriteLine("Type of the parser is validated successfully");
        }
        /// <summary>
        /// Returns parser which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the parser is required.</param>
        /// <param name="formatterName">Name of the formatter to use.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (formatterName == Properties.Resource.FastAName)
                {
                    formatter = new FastAFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.FastQName)
                {
                    formatter = new FastQFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.GENBANK_NAME)
                {
                    formatter = new GenBankFormatter(fileName);
                }
                else
                {
                    // Do a search through the known formatters to pick up custom formatters added through add-in.
                    formatter = All.FirstOrDefault(p => p.Name == formatterName);
                    // If we found a match based on extension, then open the file - this
                    // matches the above behavior where a specific formatter was created for
                    // the passed filename - the formatter is opened automatically in the constructor.
                    if (formatter != null)
                    {
                        formatter.Open(fileName);
                    }
                }
            }

            return(formatter);
        }
Exemplo n.º 3
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);

            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);

                IEnumerable <ISequence> seqList = new GenBankParser().Parse(fileInfo.FullName);

                ISequenceFormatter formatter = new GenBankFormatter();
                using (formatter.Open(TempGenBankFileName))
                {
                    (formatter as GenBankFormatter).Format(seqList.ToList());
                }

                using (var reader = new StreamReader(TempGenBankFileName))
                {
                    string actual = reader.ReadToEnd();
                }

                File.Delete(TempGenBankFileName);
            }
        }
Exemplo n.º 4
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir
            ISequenceParser    parser    = new GenBankParser();
            ISequenceFormatter formatter = new GenBankFormatter();

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);

            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);

                try
                {
                    foreach (Sequence sequence in parser.Parse(fileInfo.FullName))
                    {
                        // don't do anything with it; just make sure it doesn't crash
                        formatter.FormatString(sequence);
                    }

                    ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine);
                }
                catch (Exception e)
                {
                    ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine);
                    throw;
                }
            }
        }
Exemplo n.º 5
0
        public void TestGenBankWhenUserSetsDnaAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse(_singleDnaSeqGenBankFilename).FirstOrDefault();

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();

            formatter.Format(seq, TempGenBankFileName);

            string actual = string.Empty;

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(Utility.CleanupWhiteSpace(_singleDnaSeqGenBankFileExpectedOutput),
                            Utility.CleanupWhiteSpace(actual));
        }
Exemplo n.º 6
0
        public void GenBankFormatterValidateWriteUsingStream()
        {
            InitializeXmlVariables();

            // Create a Sequence with all attributes.
            // Parse and update the properties instead of parsing entire file.
            using (ISequenceParser parser1 = new GenBankParser(FilePath))
            {
                IEnumerable <ISequence> seqList1 = parser1.Parse();
                string          tempFileName     = System.IO.Path.GetTempFileName();
                GenBankMetadata metadata         = null;
                ISequence       seq = null;
                string          expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                Sequence orgSeq =
                    new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);
                orgSeq.Metadata.Add("GenBank",
                                    (GenBankMetadata)seqList1.ElementAt(0).Metadata["GenBank"]);
                orgSeq.ID = seqList1.ElementAt(0).ID;

                using (ISequenceFormatter formatter = new GenBankFormatter())
                {
                    using (StreamWriter writer = new StreamWriter(tempFileName))
                    {
                        formatter.Open(writer);
                        formatter.Write(orgSeq);
                    }
                }
                using (GenBankParser parserObj = new GenBankParser(tempFileName))
                {
                    IEnumerable <ISequence> seqList = parserObj.Parse();
                    seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    metadata = (GenBankMetadata)seq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
                    }
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray()));
                ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
                Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence));
                File.Delete(tempFileName);
            }
        }
Exemplo n.º 7
0
        public void TestGenBankWhenUserSetsDnaAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser(_singleDnaSeqGenBankFilename);

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse().FirstOrDefault();

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName);
            string             actual    = string.Empty;

            (formatter as GenBankFormatter).Write(seq);
            formatter.Close();

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", ""));
        }
Exemplo n.º 8
0
        /// <summary>
        /// Returns formatter which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the formatter is required.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByFile(string fileName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (Helper.IsGenBank(fileName))
                {
                    formatter = new GenBankFormatter();
                }
                else if (fileName.EndsWith(Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase))
                {
                    formatter = new GffFormatter();
                }
                else if (Helper.IsFasta(fileName))
                {
                    formatter = new FastaFormatter();
                }
                else if (Helper.IsFastQ(fileName))
                {
                    formatter = new FastQFormatter();
                }
                else
                {
                    formatter = null;
                }
            }

            return(formatter);
        }
Exemplo n.º 9
0
        public void ValidateSeqFormatterProperties()
        {
            // Gets the expected sequence from the Xml
            string fastaFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.FastAFileParserNode,
                                                                          Constants.ParserNameNode);
            string genBankFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.GenBankFileParserNode,
                                                                            Constants.ParserNameNode);
            string gffFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.GffFileParserNode,
                                                                        Constants.ParserNameNode);
            string fastQFormatterName = _utilityObj._xmlUtil.GetTextValue(Constants.FastQFileParserNode,
                                                                          Constants.ParserNameNode);

            // Get SequenceFormatter class properties.
            FastaFormatter             actualFastAFormatter       = SequenceFormatters.Fasta;
            IList <ISequenceFormatter> allFormatters              = SequenceFormatters.All;
            GenBankFormatter           actualgenBankFormatterName = SequenceFormatters.GenBank;
            FastQFormatter             actualFastQFormatterName   = SequenceFormatters.FastQ;
            GffFormatter actualGffFormatterName = SequenceFormatters.Gff;

            // Validate Sequence Formatter
            Assert.AreEqual(fastaFormatterName, actualFastAFormatter.Name);
            Assert.AreEqual(4, allFormatters.Count);
            Assert.AreEqual(genBankFormatterName, actualgenBankFormatterName.Name);
            Assert.AreEqual(gffFormatterName, actualGffFormatterName.Name);
            Assert.AreEqual(fastQFormatterName, actualFastQFormatterName.Name);
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "SequenceFormatter : Type of the parser is validated successfully"));
            ApplicationLog.WriteLine("Type of the parser is validated successfully");
        }
Exemplo n.º 10
0
        /// <summary>
        /// The execution method for the activity.
        /// </summary>
        /// <param name="executionContext">The execution context.</param>
        /// <returns>The execution status.</returns>
        protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext)
        {
            GenBankFormatter formatter = new GenBankFormatter();

            formatter.Open(OutputFile);
            if ((Sequence == null) && (SequenceList != null))
            {
                foreach (ISequence sequence in SequenceList)
                {
                    formatter.Write(sequence);
                }
            }
            else if ((Sequence != null) && (SequenceList == null))
            {
                formatter.Write(Sequence);
            }
            else if ((Sequence != null) && (SequenceList != null))
            {
                foreach (ISequence sequence in SequenceList)
                {
                    formatter.Write(sequence);
                }

                formatter.Write(Sequence);
            }

            formatter.Close();
            return(ActivityExecutionStatus.Closed);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Returns parser which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the parser is required.</param>
        /// <param name="formatterName">Name of the formatter to use.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByName(string fileName, string formatterName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (formatterName == Properties.Resource.FastAName)
                {
                    formatter = new FastAFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.FastQName)
                {
                    formatter = new FastQFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.GENBANK_NAME)
                {
                    formatter = new GenBankFormatter(fileName);
                }
                else if (formatterName == Properties.Resource.GFF_NAME)
                {
                    formatter = new GffFormatter(fileName);
                }
                else
                {
                    formatter = null;
                }
            }

            return(formatter);
        }
Exemplo n.º 12
0
        public void TestGenBankWhenUserSetsProteinAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault();

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();

            using (formatter.Open(TempGenBankFileName))
                formatter.Format(seq);

            string actual = string.Empty;

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }

            File.Delete(TempGenBankFileName);
            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", ""));
        }
Exemplo n.º 13
0
        public void GenBankFormatterWithParseValidateWriteFilePath()
        {
            InitializeXmlVariables();
            // parse
            using (ISequenceParser parserObj = new GenBankParser(FilePath))
            {
                IEnumerable <ISequence> seqList = parserObj.Parse();
                ISequence seq          = seqList.ElementAt(0);
                string    tempFileName = System.IO.Path.GetTempFileName();
                using (ISequenceFormatter formatter = new GenBankFormatter(tempFileName))
                {
                    formatter.Write(seq);
                    formatter.Close();

                    // parse
                    ISequenceParser parserObjFromFile = new GenBankParser(tempFileName);
                    seqList = parserObjFromFile.Parse();
                    seq     = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    GenBankMetadata metadata =
                        (GenBankMetadata)seq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType,
                                        metadata.Locus.Strand.ToString());
                    }
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                    metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                    Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                    Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                    metadata.Locus.Date);
                    Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                    // test the sequence string
                    Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray()));

                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the Sequence");
                    Console.WriteLine(string.Format((IFormatProvider)null,
                                                    "GenBank Formatter BVT: Successfully validated the Sequence '{0}'",
                                                    ExpectedSequence));
                    parserObjFromFile.Close();
                    parserObjFromFile.Dispose();
                    File.Delete(tempFileName);
                }
            }
        }
Exemplo n.º 14
0
        public void TestGenBankWhenParsingOne()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence       seq    = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault();

            // test the non-metadata properties
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);
            Assert.AreEqual("SCU49845", seq.ID);

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None);
            Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower(CultureInfo.CurrentCulture));
            Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse("21-JUN-1999", (IFormatProvider)null), metadata.Locus.Date);
            Assert.AreEqual("1", metadata.Version.Version);
            Assert.AreEqual("1293613", metadata.Version.GiNumber);

            // test that we're correctly putting all types of metadata in the right places
            Assert.AreEqual(1, seq.Metadata.Count);
            IList <CitationReference> referenceList = metadata.References;

            Assert.AreEqual(3, referenceList.Count);
            IList <FeatureItem> featureList = metadata.Features.All;

            Assert.AreEqual(6, featureList.Count);
            Assert.AreEqual(4, featureList[0].Qualifiers.Count);
            Assert.AreEqual(5, featureList[1].Qualifiers.Count);
            Assert.AreEqual(1, featureList[2].Qualifiers.Count);

            // test the sequence string
            string expected = @"gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc";

            Assert.AreEqual(expected, new string(seq.Select(a => (char)a).ToArray()));

            // format
            ISequenceFormatter formatter = new GenBankFormatter();

            formatter.Format(seq, TempGenBankFileName);

            string actual = string.Empty;

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(Utility.CleanupWhiteSpace(_singleProteinSeqGenBankFileExpectedOutput),
                            Utility.CleanupWhiteSpace(actual));
        }
Exemplo n.º 15
0
        public void GenBankFormatterValidateWriteWithFilePath()
        {
            InitializeXmlVariables();
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable <ISequence> seqList1 = parserObj.Parse(FilePath);
                string tempFileName            = Path.GetTempFileName();
                string expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence)
                {
                    ID = seqList1.ElementAt(0).ID
                };
                orgSeq.Metadata.Add("GenBank", seqList1.ElementAt(0).Metadata["GenBank"]);
                ISequenceFormatter formatter = new GenBankFormatter();
                {
                    formatter.Format(orgSeq, tempFileName);

                    // parse
                    ISequenceParser         parserObjFromFile = new GenBankParser();
                    IEnumerable <ISequence> seqList           = parserObjFromFile.Parse(tempFileName);
                    ISequence seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    var metadata =
                        (GenBankMetadata)orgSeq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType,
                                        metadata.Locus.Strand.ToString());
                    }
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                    metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                    Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                    Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                    metadata.Locus.Date);
                    Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                    // test the sequence string
                    Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray()));
                    ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
                    File.Delete(tempFileName);
                }
            }
        }
Exemplo n.º 16
0
        public void GenBankProperties()
        {
            ISequenceParser parser = new GenBankParser();

            Assert.AreEqual(parser.Name, Resource.GENBANK_NAME);
            Assert.AreEqual(parser.Description, Resource.GENBANKPARSER_DESCRIPTION);
            Assert.AreEqual(parser.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION);

            ISequenceFormatter formatter = new GenBankFormatter();

            Assert.AreEqual(formatter.Name, Resource.GENBANK_NAME);
            Assert.AreEqual(formatter.Description, Resource.GENBANKFORMATTER_DESCRIPTION);
            Assert.AreEqual(formatter.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION);
        }
Exemplo n.º 17
0
        public void TestGenBankWhenParsingOne()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence       seq    = parser.ParseOne(_singleProteinSeqGenBankFilename);

            // test the non-metadata properties
            Assert.IsTrue(seq.IsReadOnly);
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);
            Assert.AreEqual(MoleculeType.DNA, seq.MoleculeType);
            Assert.AreEqual("SCU49845", seq.DisplayID);
            Assert.AreEqual("SCU49845", seq.ID);

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None);
            Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower());
            Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse("21-JUN-1999"), metadata.Locus.Date);
            Assert.AreEqual("1", metadata.Version.Version);
            Assert.AreEqual("1293613", metadata.Version.GINumber);

            // test that we're correctly putting all types of metadata in the right places
            Assert.AreEqual(1, seq.Metadata.Count);
            IList <CitationReference> referenceList = metadata.References;

            Assert.AreEqual(3, referenceList.Count);
            IList <FeatureItem> featureList = metadata.Features.All;

            Assert.AreEqual(6, featureList.Count);
            Assert.AreEqual(4, featureList[0].Qualifiers.Count);
            Assert.AreEqual(5, featureList[1].Qualifiers.Count);
            Assert.AreEqual(1, featureList[2].Qualifiers.Count);

            // test the sequence string
            string expected = @"GATCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCCGACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCTGCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGAACCGCCAATAGACAACATATGTAACATATTTAGGATATACCTCGAAAATAATAAACCGCCACACTGTCATTATTATAATTAGAAACAGAACGCAAAAATTATCCACTATATAATTCAAAGACGCGAAAAAAAAAGAACAACGCGTCATAGAACTTTTGGCAATTCGCGTCACAAATAAATTTTGGCAACTTATGTTTCCTCTTCGAGCAGTACTCGAGCCCTGTCTCAAGAATGTAATAATACCCATCGTAGGTATGGTTAAAGATAGCATCTCCACAACCTCAAAGCTCCTTGCCGAGAGTCGCCCTCCTTTGTCGAGTAATTTTCACTTTTCATATGAGAACTTATTTTCTTATTCTTTACTCTCACATCCTGTAGTGATTGACACTGCAACAGCCACCATCACTAGAAGAACAGAACAATTACTTAATAGAAAAATTATATCTTCCTCGAAACGATTTCCTGCTTCCAACATCTACGTATATCAAGAAGCATTCACTTACCATGACACAGCTTCAGATTTCATTATTGCTGACAGCTACTATATCACTACTCCATCTAGTAGTGGCCACGCCCTATGAGGCATATCCTATCGGAAAACAATACCCCCCAGTGGCAAGAGTCAATGAATCGTTTACATTTCAAATTTCCAATGATACCTATAAATCGTCTGTAGACAAGACAGCTCAAATAACATACAATTGCTTCGACTTACCGAGCTGGCTTTCGTTTGACTCTAGTTCTAGAACGTTCTCAGGTGAACCTTCTTCTGACTTACTATCTGATGCGAACACCACGTTGTATTTCAATGTAATACTCGAGGGTACGGACTCTGCCGACAGCACGTCTTTGAACAATACATACCAATTTGTTGTTACAAACCGTCCATCCATCTCGCTATCGTCAGATTTCAATCTATTGGCGTTGTTAAAAAACTATGGTTATACTAACGGCAAAAACGCTCTGAAACTAGATCCTAATGAAGTCTTCAACGTGACTTTTGACCGTTCAATGTTCACTAACGAAGAATCCATTGTGTCGTATTACGGACGTTCTCAGTTGTATAATGCGCCGTTACCCAATTGGCTGTTCTTCGATTCTGGCGAGTTGAAGTTTACTGGGACGGCACCGGTGATAAACTCGGCGATTGCTCCAGAAACAAGCTACAGTTTTGTCATCATCGCTACAGACATTGAAGGATTTTCTGCCGTTGAGGTAGAATTCGAATTAGTCATCGGGGCTCACCAGTTAACTACCTCTATTCAAAATAGTTTGATAATCAACGTTACTGACACAGGTAACGTTTCATATGACTTACCTCTAAACTATGTTTATCTCGATGACGATCCTATTTCTTCTGATAAATTGGGTTCTATAAACTTATTGGATGCTCCAGACTGGGTGGCATTAGATAATGCTACCATTTCCGGGTCTGTCCCAGATGAATTACTCGGTAAGAACTCCAATCCTGCCAATTTTTCTGTGTCCATTTATGATACTTATGGTGATGTGATTTATTTCAACTTCGAAGTTGTCTCCACAACGGATTTGTTTGCCATTAGTTCTCTTCCCAATATTAACGCTACAAGGGGTGAATGGTTCTCCTACTATTTTTTGCCTTCTCAGTTTACAGACTACGTGAATACAAACGTTTCATTAGAGTTTACTAATTCAAGCCAAGACCATGACTGGGTGAAATTCCAATCATCTAATTTAACATTAGCTGGAGAAGTGCCCAAGAATTTCGACAAGCTTTCATTAGGTTTGAAAGCGAACCAAGGTTCACAATCTCAAGAGCTATATTTTAACATCATTGGCATGGATTCAAAGATAACTCACTCAAACCACAGTGCGAATGCAACGTCCACAAGAAGTTCTCACCACTCCACCTCAACAAGTTCTTACACATCTTCTACTTACACTGCAAAAATTTCTTCTACCTCCGCTGCTGCTACTTCTTCTGCTCCAGCAGCGCTGCCAGCAGCCAATAAAACTTCATCTCACAATAAAAAAGCAGTAGCAATTGCGTGCGGTGTTGCTATCCCATTAGGCGTTATCCTAGTAGCTCTCATTTGCTTCCTAATATTCTGGAGACGCAGAAGGGAAAATCCAGACGATGAAAACTTACCGCATGCTATTAGTGGACCTGATTTGAATAATCCTGCAAATAAACCAAATCAAGAAAACGCTACACCTTTGAACAACCCCTTTGATGATGATGCTTCCTCGTACGATGATACTTCAATAGCAAGAAGATTGGCTGCTTTGAACACTTTGAAATTGGATAACCACTCTGCCACTGAATCTGATATTTCCAGCGTGGATGAAAAGAGAGATTCTCTATCAGGTATGAATACATACAATGATCAGTTCCAATCCCAAAGTAAAGAAGAATTATTAGCAAAACCCCCAGTACAGCCTCCAGAGAGCCCGTTCTTTGACCCACAGAATAGGTCTTCTTCTGTGTATATGGATAGTGAACCAGCAGTAAATAAATCCTGGCGATATACTGGCAACCTGTCACCAGTCTCTGATATTGTCAGAGACAGTTACGGATCACAAAAAACTGTTGATACAGAAAAACTTTTCGATTTAGAAGCACCAGAGAAGGAAAAACGTACGTCAAGGGATGTCACTATGTCTTCACTGGACCCTTGGAACAGCAATATTAGCCCTTCTCCCGTAAGAAAATCAGTAACACCATCACCATATAACGTAACGAAGCATCGTAACCGCCACTTACAAAATATTCAAGACTCTCAAAGCGGTAAAAACGGAATCACTCCCACAACAATGTCAACTTCATCTTCTGACGATTTTGTTCCGGTTAAAGATGGTGAAAATTTTTGCTGGGTCCATAGCATGGAACCAGACAGAAGACCAAGTAAGAAAAGGTTAGTAGATTTTTCAAATAAGAGTAATGTCAATGTTGGTCAAGTTAAGGACATTCACGGACGCATCCCAGAAATGCTGTGATTATACGCAACGATATTTTGCTTAATTTTATTTTCCTGTTTTATTTTTTATTAGTGGTTTACAGATACCCTATATTTTATTTAGTTTTTATACTTAGAGACATTTAATTTTAATTCCATTCTTCAAATTTCATTTTTGCACTTAAAACAAAGATCCAAAAATGCTCTCGCCCTCTTCATATTGAGAATACACTCCATTCAAAATTTTGTCGTCACCGCTGATTAATTTTTCACTAAACTGATGAATAATCAAAGGCCCCACGTCAGAACCGACTAAAGAAGTGAGTTTTATTTTAGGAGGTTGAAAACCATTATTGTCTGGTAAATTTTCATCTTCTTGACATTTAACCCAGTTTGAATCCCTTTCAATTTCTGCTTTTTCCTCCAAACTATCGACCCTCCTGTTTCTGTCCAACTTATGTCCTAGTTCCAATTCGATCGCATTAATAACTGCTTCAAATGTTATTGTGTCATCGTTGACTTTAGGTAATTTCTCCAAATGCATAATCAAACTATTTAAGGAAGATCGGAATTCGTCGAACACTTCAGTTTCCGTAATGATCTGATCGTCTTTATCCACATGTTGTAATTCACTAAAATCTAAAACGTATTTTTCAATGCATAAATCGTTCTTTTTATTAATAATGCAGATGGAAAATCTGTAAACGTGCGTTAATTTAGAAAGAACATCCAGTATAAGTTCTTCTATATAGTCAATTAAAGCAGGATGCCTATTAATGGGAACGAACTGCGGCAAGTTGAATGACTGGTAAGTAGTGTAGTCGAATGACTGAGGTGGGTATACATTTCTATAAAATAAAATCAAATTAATGTAGCATTTTAAGTATACCCTCAGCCACTTCTCTACCCATCTATTCATAAAGCTGACGCAACGATTACTATTTTTTTTTTCTTCTTGGATCTCAGTCGTCGCAAAAACGTATACCTTCTTTTTCCGACCTTTTTTTTAGCTTTCTGGAAAAGTTTATATTAGTTAAACAGGGTCTAGTCTTAGTGTGAAAGCTAGTGGTTTCGATTGACTGATATTAAGAAAGTGGAAATTAAATTAGTAGTGTAGACGTATATGCATATGTATTTCTCGCCTGTTTATGTTTCTACGTACTTTTGATTTATAGCAAGGGGAAAAGAAATACATACTATTTTTTGGTAAAGGTGAAAGCATAATGTAAAAGCTAGAATAAAATGGACGAAATAAAGAGAGGCTTAGTTCATCTTTTTTCCAAAAAGCACCCAATGATAATAACTAAAATGAAAAGGATTTGCCATCTGTCAGCAACATCAGTTGTGTGAGCAATAATAAAATCATCACCTCCGTTGCCTTTAGCGCGTTTGTCGTTTGTATCTTCCGTAATTTTAGTCTTATCAATGGGAATCATAAATTTTCCAATGAATTAGCAATTTCGTCCAATTCTTTTTGAGCTTCTTCATATTTGCTTTGGAATTCTTCGCACTTCTTTTCCCATTCATCTCTTTCTTCTTCCAAAGCAACGATCCTTCTACCCATTTGCTCAGAGTTCAAATCGGCCTCTTTCAGTTTATCCATTGCTTCCTTCAGTTTGGCTTCACTGTCTTCTAGCTGTTGTTCTAGATCCTGGTTTTTCTTGGTGTAGTTCTCATTATTAGATCTCAAGTTATTGGAGTCTTCAGCCAATTGCTTTGTATCAGACAATTGACTCTCTAACTTCTCCACTTCACTGTCGAGTTGCTCGTTTTTAGCGGACAAAGATTTAATCTCGTTTTCTTTTTCAGTGTTAGATTGCTCTAATTCTTTGAGCTGTTCTCTCAGCTCCTCATATTTTTCTTGCCATGACTCAGATTCTAATTTTAAGCTATTCAATTTCTCTTTGATC";

            Assert.AreEqual(expected, seq.ToString());

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            string             actual    = formatter.FormatString(seq);

            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", ""));
        }
Exemplo n.º 18
0
        public void TestGenBankWhenUserSetsDnaAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.ParseOne(_singleDnaSeqGenBankFilename);

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            string             actual    = formatter.FormatString(seq);

            // test the formatting
            Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", ""));
        }
Exemplo n.º 19
0
        /// <summary>
        /// Returns formatter which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the formatter is required.</param>
        /// <returns>If found returns the formatter as ISequenceFormatter else returns null.</returns>
        public static ISequenceFormatter FindFormatterByFileName(string fileName)
        {
            ISequenceFormatter formatter = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (IsFasta(fileName))
                {
                    formatter = new FastAFormatter(fileName);
                }
                else if (IsFastQ(fileName))
                {
                    formatter = new FastQFormatter(fileName);
                }
                else if (IsGenBank(fileName))
                {
                    formatter = new GenBankFormatter(fileName);
                }
                else if (fileName.EndsWith(Properties.Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase))
                {
                    formatter = new GffFormatter(fileName);
                }
                else
                {
                    // Do a search through the known formatters to pick up custom formatters added through add-in.
                    string fileExtension = Path.GetExtension(fileName);
                    if (!string.IsNullOrEmpty(fileExtension))
                    {
                        formatter = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension));
                        // If we found a match based on extension, then open the file - this
                        // matches the above behavior where a specific formatter was created for
                        // the passed filename - the formatter is opened automatically in the constructor.
                        if (formatter != null)
                        {
                            formatter.Open(fileName);
                        }
                    }
                }
            }

            return(formatter);
        }
Exemplo n.º 20
0
        protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext)
        {
            GenBankFormatter formatter = new GenBankFormatter();

            if ((Sequence == null) && (SequenceList != null))
            {
                formatter.Format(SequenceList, OutputFile);
            }
            else if ((Sequence != null) && (SequenceList == null))
            {
                formatter.Format(Sequence, OutputFile);
            }
            else if ((Sequence != null) && (SequenceList != null))
            {
                SequenceList.Add(Sequence);
                formatter.Format(SequenceList, OutputFile);
                SequenceList.Remove(Sequence);
            }
            return(ActivityExecutionStatus.Closed);
        }
Exemplo n.º 21
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);

            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);
                ISequenceParser parser = new GenBankParser(fileInfo.FullName);


                try
                {
                    IEnumerable <ISequence> seqList = parser.Parse();
                    // don't do anything with it; just make sure it doesn't crash
                    ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName);
                    string             actual    = string.Empty;

                    (formatter as GenBankFormatter).Write(seqList.ToList());

                    using (StreamReader reader = new StreamReader(TempGenBankFileName))
                    {
                        actual = reader.ReadToEnd();
                    }
                    File.Delete(TempGenBankFileName);
                    parser.Close();
                    parser.Dispose();

                    ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine);
                }
                catch (Exception e)
                {
                    ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine);
                    throw;
                }
            }
        }
Exemplo n.º 22
0
        public void GenBankFormatterValidateReadAndWriteMultipleDBLinks()
        {
            // Create a Sequence with all attributes.
            // parse and update the properties instead of parsing entire file.
            string          tempFileName = Path.GetTempFileName();
            ISequenceParser parser1      = new GenBankParser();

            using (parser1.Open(_genBankFile_WithMultipleDBLines))
            {
                var orgSeq = parser1.Parse().First();
                ISequenceFormatter formatter = new GenBankFormatter();
                using (formatter.Open(tempFileName))
                {
                    formatter.Format(orgSeq);
                    formatter.Close();
                }
            }
            var same = Utility.CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines);

            File.Delete(tempFileName);
            Assert.IsTrue(same);
            ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop");
        }
Exemplo n.º 23
0
        public void GenBankFormatterWithParseValidateFormatFilePath()
        {
            InitializeXmlVariables();
            // parse
            ISequenceParser   parserObj = new GenBankParser();
            IList <ISequence> seqList   = parserObj.Parse(FilePath);

            ISequence seq = seqList[0];

            ISequenceFormatter formatter = new GenBankFormatter();

            formatter.Format(seq, Constants.GenBankTempFileName);

            // parse
            parserObj = new GenBankParser();
            seqList   = parserObj.Parse(Constants.GenBankTempFileName);

            seq = seqList[0];

            // test the non-metadata properties
            if (0 == string.Compare(IsSequenceReadOnly, "true",
                                    false, CultureInfo.CurrentCulture))
            {
                Assert.IsTrue(seq.IsReadOnly);
                ApplicationLog.WriteLine("Successfully validated the ReadOnly Property");
            }

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata =
                (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType,
                                metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                            metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                            metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "GenBank Formatter BVT: Successfully validated the Sequence '{0}'",
                                            ExpectedSequence));

            File.Delete(Constants.GenBankTempFileName);
        }
Exemplo n.º 24
0
        public void GenBankFormatterValidateFormatTextWriter()
        {
            InitializeXmlVariables();
            // Create a Sequence with all attributes.
            // parse and update the properties instead of parsing entire file.
            ISequenceParser   parser1  = new GenBankParser();
            IList <ISequence> seqList1 = parser1.Parse(FilePath);

            string expectedUpdatedSequence =
                ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
            Sequence orgSeq =
                new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);

            orgSeq.Metadata.Add("GenBank",
                                (GenBankMetadata)seqList1[0].Metadata["GenBank"]);
            orgSeq.ID           = seqList1[0].ID;
            orgSeq.DisplayID    = seqList1[0].DisplayID;
            orgSeq.MoleculeType = seqList1[0].MoleculeType;

            ISequenceFormatter formatter = new GenBankFormatter();

            using (TextWriter writer =
                       new StreamWriter(Constants.GenBankTempFileName))
            {
                formatter.Format(orgSeq, writer);
            }

            // parse
            GenBankParser     parserObj = new GenBankParser();
            IList <ISequence> seqList   = parserObj.Parse(Constants.GenBankTempFileName);

            ISequence seq = seqList[0];

            // test the non-metadata properties
            if (0 == string.Compare(IsSequenceReadOnly, "true",
                                    false, CultureInfo.CurrentCulture))
            {
                Assert.IsTrue(seq.IsReadOnly);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the ReadOnly Property");
            }

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence));

            File.Delete(Constants.GenBankTempFileName);
        }