Пример #1
0
        public void GenBankFormatterValidateWriteWithFilePath()
        {
            InitializeXmlVariables();
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable <ISequence> seqList1 = parserObj.Parse(FilePath);
                string tempFileName            = Path.GetTempFileName();
                string expectedUpdatedSequence =
                    ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
                var orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence)
                {
                    ID = seqList1.ElementAt(0).ID
                };
                orgSeq.Metadata.Add("GenBank", seqList1.ElementAt(0).Metadata["GenBank"]);
                ISequenceFormatter formatter = new GenBankFormatter();
                {
                    formatter.Format(orgSeq, tempFileName);

                    // parse
                    ISequenceParser         parserObjFromFile = new GenBankParser();
                    IEnumerable <ISequence> seqList           = parserObjFromFile.Parse(tempFileName);
                    ISequence seq = seqList.ElementAt(0);
                    Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                    Assert.AreEqual(SeqId, seq.ID);
                    ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                    // test the metadata that is tricky to parse, and will not be tested implicitly by
                    // testing the formatting
                    var metadata =
                        (GenBankMetadata)orgSeq.Metadata["GenBank"];
                    if (metadata.Locus.Strand != SequenceStrandType.None)
                    {
                        Assert.AreEqual(StrandType,
                                        metadata.Locus.Strand.ToString());
                    }
                    Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                    metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                    Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                    Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                    metadata.Locus.Date);
                    Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                    Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                    ApplicationLog.WriteLine(
                        "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                    // test the sequence string
                    Assert.AreEqual(ExpectedSequence, new string(seq.Select(a => (char)a).ToArray()));
                    ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
                    File.Delete(tempFileName);
                }
            }
        }
Пример #2
0
        public void TestGenBankFeaturesWithBinaryFormatter()
        {
            Stream stream = null;

            try
            {
                stream = File.Open("GenbankMetadata.data", FileMode.Create);
                BinaryFormatter formatter = new BinaryFormatter();
                ISequenceParser parser    = new GenBankParser();
                ISequence       seq       = parser.ParseOne(@"testdata\GenBank\NC_001284.gbk");
                GenBankMetadata metadata  = seq.Metadata["GenBank"] as GenBankMetadata;
                Assert.AreEqual(metadata.Features.All.Count, 743);
                Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
                Assert.AreEqual(metadata.Features.Exons.Count, 32);
                Assert.AreEqual(metadata.Features.Introns.Count, 22);
                Assert.AreEqual(metadata.Features.Genes.Count, 60);
                Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
                Assert.AreEqual(metadata.Features.Promoters.Count, 17);
                Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
                Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
                Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString());
                Assert.AreEqual(metadata.GetFeatures(11918, 12241).Count, 2);
                formatter.Serialize(stream, metadata);
                stream.Seek(0, SeekOrigin.Begin);
                GenBankMetadata deserializedMetadata = (GenBankMetadata)formatter.Deserialize(stream);
                Assert.AreNotSame(metadata, deserializedMetadata);
                Assert.AreEqual(deserializedMetadata.Features.All.Count, 743);
                Assert.AreEqual(deserializedMetadata.Features.CodingSequences.Count, 117);
                Assert.AreEqual(deserializedMetadata.Features.Exons.Count, 32);
                Assert.AreEqual(deserializedMetadata.Features.Introns.Count, 22);
                Assert.AreEqual(deserializedMetadata.Features.Genes.Count, 60);
                Assert.AreEqual(deserializedMetadata.Features.MiscFeatures.Count, 455);
                Assert.AreEqual(deserializedMetadata.Features.Promoters.Count, 17);
                Assert.AreEqual(deserializedMetadata.Features.TransferRNAs.Count, 21);
                Assert.AreEqual(deserializedMetadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
                Assert.AreEqual(deserializedMetadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString());
                Assert.AreEqual(deserializedMetadata.GetFeatures(11918, 12241).Count, 2);
            }
            catch
            {
                Assert.Fail();
            }
            finally
            {
                if (stream != null)
                {
                    stream.Close();
                    stream = null;
                }
            }
        }
Пример #3
0
        public void GenBankParserValidateParseFileName()
        {
            // parse
            ISequenceParser   parserObj = new GenBankParser();
            IList <ISequence> seqList   = parserObj.Parse(FilePath);

            ISequence seq = seqList[0];

            // test the non-metadata properties
            if (0 == string.Compare(IsSequenceReadOnly, "true", false,
                                    CultureInfo.CurrentCulture))
            {
                Assert.IsTrue(seq.IsReadOnly);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the ReadOnly Property");
            }

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType,
                                metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                            metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                            metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString());
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format(null,
                                            "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                            ExpectedSequence));
        }
Пример #4
0
        /// <summary>
        /// Finds a suitable parser that supports the specified file, opens the file and returns the parser.
        /// </summary>
        /// <param name="fileName">File name for which the parser is required.</param>
        /// <returns>If found returns the open parser as ISequenceParser else returns null.</returns>
        public static ISequenceParser FindParserByFileName(string fileName)
        {
            ISequenceParser parser = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (Helper.IsZippedFasta(fileName))
                {
                    parser = new FastAZippedParser(fileName);
                }
                else if (Helper.IsZippedFastQ(fileName))
                {
                    parser = new FastQZippedParser(fileName);
                }
                else if (IsFasta(fileName))
                {
                    parser = new FastAParser(fileName);
                }
                else if (IsFastQ(fileName))
                {
                    parser = new FastQParser(fileName);
                }
                else if (Helper.IsBAM(fileName))
                {
                    throw new NotImplementedException();
                    //parser = new BAMSequenceParser(fileName);
                }
                else if (IsGenBank(fileName))
                {
                    parser = new GenBankParser(fileName);
                }
                else
                {
                    // Do a search through the known parsers to pick up custom parsers added through add-in.
                    string fileExtension = Path.GetExtension(fileName);
                    if (!string.IsNullOrEmpty(fileExtension))
                    {
                        parser = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension));
                        // If we found a match based on extension, then open the file - this
                        // matches the above behavior where a specific parser was created for
                        // the passed filename - the parser is opened automatically in the constructor.
                        if (parser != null)
                        {
                            parser.Open(fileName);
                        }
                    }
                }
            }

            return(parser);
        }
Пример #5
0
        public void GenBankProperties()
        {
            ISequenceParser parser = new GenBankParser();

            Assert.AreEqual(parser.Name, Resource.GENBANK_NAME);
            Assert.AreEqual(parser.Description, Resource.GENBANKPARSER_DESCRIPTION);
            Assert.AreEqual(parser.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION);

            ISequenceFormatter formatter = new GenBankFormatter();

            Assert.AreEqual(formatter.Name, Resource.GENBANK_NAME);
            Assert.AreEqual(formatter.Description, Resource.GENBANKFORMATTER_DESCRIPTION);
            Assert.AreEqual(formatter.SupportedFileTypes, Resource.GENBANK_FILEEXTENSION);
        }
Пример #6
0
        public void GenBankParserValidateParseFileNameWithStream()
        {
            InitializeXmlVariables();
            List <ISequence>        seq     = null;
            IEnumerable <ISequence> seqList = null;

            // Parse the Stream.
            using (ISequenceParser parserObj = new GenBankParser())
            {
                using (StreamReader reader = new StreamReader(FilePath))
                {
                    seqList = parserObj.Parse(reader);
                    seq     = seqList.ToList();
                }

                Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq[0].Alphabet);
                Assert.AreEqual(SeqId, seq[0].ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                GenBankMetadata metadata = (GenBankMetadata)seq[0].Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);

                Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, new string(seq[0].Select(a => (char)a).ToArray()));

                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Sequence");
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                                ExpectedSequence));
            }
        }
Пример #7
0
        public void TestGenBankWhenParsingOne()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence       seq    = parser.ParseOne(_singleProteinSeqGenBankFilename);

            // test the non-metadata properties
            Assert.IsTrue(seq.IsReadOnly);
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);
            Assert.AreEqual(MoleculeType.DNA, seq.MoleculeType);
            Assert.AreEqual("SCU49845", seq.DisplayID);
            Assert.AreEqual("SCU49845", seq.ID);

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None);
            Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower());
            Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse("21-JUN-1999"), metadata.Locus.Date);
            Assert.AreEqual("1", metadata.Version.Version);
            Assert.AreEqual("1293613", metadata.Version.GINumber);

            // test that we're correctly putting all types of metadata in the right places
            Assert.AreEqual(1, seq.Metadata.Count);
            IList <CitationReference> referenceList = metadata.References;

            Assert.AreEqual(3, referenceList.Count);
            IList <FeatureItem> featureList = metadata.Features.All;

            Assert.AreEqual(6, featureList.Count);
            Assert.AreEqual(4, featureList[0].Qualifiers.Count);
            Assert.AreEqual(5, featureList[1].Qualifiers.Count);
            Assert.AreEqual(1, featureList[2].Qualifiers.Count);

            // test the sequence string
            string expected = @"GATCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCCGACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCTGCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGAACCGCCAATAGACAACATATGTAACATATTTAGGATATACCTCGAAAATAATAAACCGCCACACTGTCATTATTATAATTAGAAACAGAACGCAAAAATTATCCACTATATAATTCAAAGACGCGAAAAAAAAAGAACAACGCGTCATAGAACTTTTGGCAATTCGCGTCACAAATAAATTTTGGCAACTTATGTTTCCTCTTCGAGCAGTACTCGAGCCCTGTCTCAAGAATGTAATAATACCCATCGTAGGTATGGTTAAAGATAGCATCTCCACAACCTCAAAGCTCCTTGCCGAGAGTCGCCCTCCTTTGTCGAGTAATTTTCACTTTTCATATGAGAACTTATTTTCTTATTCTTTACTCTCACATCCTGTAGTGATTGACACTGCAACAGCCACCATCACTAGAAGAACAGAACAATTACTTAATAGAAAAATTATATCTTCCTCGAAACGATTTCCTGCTTCCAACATCTACGTATATCAAGAAGCATTCACTTACCATGACACAGCTTCAGATTTCATTATTGCTGACAGCTACTATATCACTACTCCATCTAGTAGTGGCCACGCCCTATGAGGCATATCCTATCGGAAAACAATACCCCCCAGTGGCAAGAGTCAATGAATCGTTTACATTTCAAATTTCCAATGATACCTATAAATCGTCTGTAGACAAGACAGCTCAAATAACATACAATTGCTTCGACTTACCGAGCTGGCTTTCGTTTGACTCTAGTTCTAGAACGTTCTCAGGTGAACCTTCTTCTGACTTACTATCTGATGCGAACACCACGTTGTATTTCAATGTAATACTCGAGGGTACGGACTCTGCCGACAGCACGTCTTTGAACAATACATACCAATTTGTTGTTACAAACCGTCCATCCATCTCGCTATCGTCAGATTTCAATCTATTGGCGTTGTTAAAAAACTATGGTTATACTAACGGCAAAAACGCTCTGAAACTAGATCCTAATGAAGTCTTCAACGTGACTTTTGACCGTTCAATGTTCACTAACGAAGAATCCATTGTGTCGTATTACGGACGTTCTCAGTTGTATAATGCGCCGTTACCCAATTGGCTGTTCTTCGATTCTGGCGAGTTGAAGTTTACTGGGACGGCACCGGTGATAAACTCGGCGATTGCTCCAGAAACAAGCTACAGTTTTGTCATCATCGCTACAGACATTGAAGGATTTTCTGCCGTTGAGGTAGAATTCGAATTAGTCATCGGGGCTCACCAGTTAACTACCTCTATTCAAAATAGTTTGATAATCAACGTTACTGACACAGGTAACGTTTCATATGACTTACCTCTAAACTATGTTTATCTCGATGACGATCCTATTTCTTCTGATAAATTGGGTTCTATAAACTTATTGGATGCTCCAGACTGGGTGGCATTAGATAATGCTACCATTTCCGGGTCTGTCCCAGATGAATTACTCGGTAAGAACTCCAATCCTGCCAATTTTTCTGTGTCCATTTATGATACTTATGGTGATGTGATTTATTTCAACTTCGAAGTTGTCTCCACAACGGATTTGTTTGCCATTAGTTCTCTTCCCAATATTAACGCTACAAGGGGTGAATGGTTCTCCTACTATTTTTTGCCTTCTCAGTTTACAGACTACGTGAATACAAACGTTTCATTAGAGTTTACTAATTCAAGCCAAGACCATGACTGGGTGAAATTCCAATCATCTAATTTAACATTAGCTGGAGAAGTGCCCAAGAATTTCGACAAGCTTTCATTAGGTTTGAAAGCGAACCAAGGTTCACAATCTCAAGAGCTATATTTTAACATCATTGGCATGGATTCAAAGATAACTCACTCAAACCACAGTGCGAATGCAACGTCCACAAGAAGTTCTCACCACTCCACCTCAACAAGTTCTTACACATCTTCTACTTACACTGCAAAAATTTCTTCTACCTCCGCTGCTGCTACTTCTTCTGCTCCAGCAGCGCTGCCAGCAGCCAATAAAACTTCATCTCACAATAAAAAAGCAGTAGCAATTGCGTGCGGTGTTGCTATCCCATTAGGCGTTATCCTAGTAGCTCTCATTTGCTTCCTAATATTCTGGAGACGCAGAAGGGAAAATCCAGACGATGAAAACTTACCGCATGCTATTAGTGGACCTGATTTGAATAATCCTGCAAATAAACCAAATCAAGAAAACGCTACACCTTTGAACAACCCCTTTGATGATGATGCTTCCTCGTACGATGATACTTCAATAGCAAGAAGATTGGCTGCTTTGAACACTTTGAAATTGGATAACCACTCTGCCACTGAATCTGATATTTCCAGCGTGGATGAAAAGAGAGATTCTCTATCAGGTATGAATACATACAATGATCAGTTCCAATCCCAAAGTAAAGAAGAATTATTAGCAAAACCCCCAGTACAGCCTCCAGAGAGCCCGTTCTTTGACCCACAGAATAGGTCTTCTTCTGTGTATATGGATAGTGAACCAGCAGTAAATAAATCCTGGCGATATACTGGCAACCTGTCACCAGTCTCTGATATTGTCAGAGACAGTTACGGATCACAAAAAACTGTTGATACAGAAAAACTTTTCGATTTAGAAGCACCAGAGAAGGAAAAACGTACGTCAAGGGATGTCACTATGTCTTCACTGGACCCTTGGAACAGCAATATTAGCCCTTCTCCCGTAAGAAAATCAGTAACACCATCACCATATAACGTAACGAAGCATCGTAACCGCCACTTACAAAATATTCAAGACTCTCAAAGCGGTAAAAACGGAATCACTCCCACAACAATGTCAACTTCATCTTCTGACGATTTTGTTCCGGTTAAAGATGGTGAAAATTTTTGCTGGGTCCATAGCATGGAACCAGACAGAAGACCAAGTAAGAAAAGGTTAGTAGATTTTTCAAATAAGAGTAATGTCAATGTTGGTCAAGTTAAGGACATTCACGGACGCATCCCAGAAATGCTGTGATTATACGCAACGATATTTTGCTTAATTTTATTTTCCTGTTTTATTTTTTATTAGTGGTTTACAGATACCCTATATTTTATTTAGTTTTTATACTTAGAGACATTTAATTTTAATTCCATTCTTCAAATTTCATTTTTGCACTTAAAACAAAGATCCAAAAATGCTCTCGCCCTCTTCATATTGAGAATACACTCCATTCAAAATTTTGTCGTCACCGCTGATTAATTTTTCACTAAACTGATGAATAATCAAAGGCCCCACGTCAGAACCGACTAAAGAAGTGAGTTTTATTTTAGGAGGTTGAAAACCATTATTGTCTGGTAAATTTTCATCTTCTTGACATTTAACCCAGTTTGAATCCCTTTCAATTTCTGCTTTTTCCTCCAAACTATCGACCCTCCTGTTTCTGTCCAACTTATGTCCTAGTTCCAATTCGATCGCATTAATAACTGCTTCAAATGTTATTGTGTCATCGTTGACTTTAGGTAATTTCTCCAAATGCATAATCAAACTATTTAAGGAAGATCGGAATTCGTCGAACACTTCAGTTTCCGTAATGATCTGATCGTCTTTATCCACATGTTGTAATTCACTAAAATCTAAAACGTATTTTTCAATGCATAAATCGTTCTTTTTATTAATAATGCAGATGGAAAATCTGTAAACGTGCGTTAATTTAGAAAGAACATCCAGTATAAGTTCTTCTATATAGTCAATTAAAGCAGGATGCCTATTAATGGGAACGAACTGCGGCAAGTTGAATGACTGGTAAGTAGTGTAGTCGAATGACTGAGGTGGGTATACATTTCTATAAAATAAAATCAAATTAATGTAGCATTTTAAGTATACCCTCAGCCACTTCTCTACCCATCTATTCATAAAGCTGACGCAACGATTACTATTTTTTTTTTCTTCTTGGATCTCAGTCGTCGCAAAAACGTATACCTTCTTTTTCCGACCTTTTTTTTAGCTTTCTGGAAAAGTTTATATTAGTTAAACAGGGTCTAGTCTTAGTGTGAAAGCTAGTGGTTTCGATTGACTGATATTAAGAAAGTGGAAATTAAATTAGTAGTGTAGACGTATATGCATATGTATTTCTCGCCTGTTTATGTTTCTACGTACTTTTGATTTATAGCAAGGGGAAAAGAAATACATACTATTTTTTGGTAAAGGTGAAAGCATAATGTAAAAGCTAGAATAAAATGGACGAAATAAAGAGAGGCTTAGTTCATCTTTTTTCCAAAAAGCACCCAATGATAATAACTAAAATGAAAAGGATTTGCCATCTGTCAGCAACATCAGTTGTGTGAGCAATAATAAAATCATCACCTCCGTTGCCTTTAGCGCGTTTGTCGTTTGTATCTTCCGTAATTTTAGTCTTATCAATGGGAATCATAAATTTTCCAATGAATTAGCAATTTCGTCCAATTCTTTTTGAGCTTCTTCATATTTGCTTTGGAATTCTTCGCACTTCTTTTCCCATTCATCTCTTTCTTCTTCCAAAGCAACGATCCTTCTACCCATTTGCTCAGAGTTCAAATCGGCCTCTTTCAGTTTATCCATTGCTTCCTTCAGTTTGGCTTCACTGTCTTCTAGCTGTTGTTCTAGATCCTGGTTTTTCTTGGTGTAGTTCTCATTATTAGATCTCAAGTTATTGGAGTCTTCAGCCAATTGCTTTGTATCAGACAATTGACTCTCTAACTTCTCCACTTCACTGTCGAGTTGCTCGTTTTTAGCGGACAAAGATTTAATCTCGTTTTCTTTTTCAGTGTTAGATTGCTCTAATTCTTTGAGCTGTTCTCTCAGCTCCTCATATTTTTCTTGCCATGACTCAGATTCTAATTTTAAGCTATTCAATTTCTCTTTGATC";

            Assert.AreEqual(expected, seq.ToString());

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            string             actual    = formatter.FormatString(seq);

            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", ""));
        }
Пример #8
0
        public void ValidateBasicDerivedSequenceWithGenBankFormat()
        {
            // Gets the expected sequence from the Xml
            string expectedSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.ExpectedSequenceNode);
            string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.FilePathNode);

            Assert.IsTrue(File.Exists(geneBankFilePath));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: Sequence BVT: The File exist in the Path ", geneBankFilePath));

            // Parse a GenBank file Using Parse method and convert the same to sequence.
            ISequenceParser parser = new GenBankParser();

            IList <ISequence> sequence = parser.Parse(geneBankFilePath);

            Assert.IsNotNull(sequence);
            Sequence geneBankSeq = (Sequence)sequence[0];

            Assert.IsNotNull(geneBankSeq);
            Assert.AreEqual(expectedSequence, geneBankSeq.ToString());
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The GenBank Sequence is as expected."));

            byte[] tmpEncodedSeq = new byte[geneBankSeq.Count];
            (geneBankSeq as IList <byte>).CopyTo(tmpEncodedSeq, 0);
            Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length);
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The GenBank Sequence Length is as expected."));

            // Create a derived Sequences for the fastA file sequence.
            BasicDerivedSequence genebankDerivedSeq =
                new BasicDerivedSequence(geneBankSeq, false, false, -1, -1);

            // validate the DerivedSequence with originalSequence.
            Assert.IsNotNull(genebankDerivedSeq);
            Assert.AreEqual(expectedSequence, genebankDerivedSeq.ToString());
            Assert.AreEqual(geneBankSeq.ToString(), genebankDerivedSeq.ToString());
            ApplicationLog.WriteLine(string.Concat(
                                         "Sequence BVT: The BasicDerived Sequence is as expected."));

            // Logs to Nunit GUI.
            Console.WriteLine(
                "Sequence BVT: Validation of GenBank file Sequence is completed successfully.");
        }
Пример #9
0
        public void ValidateSequenceInsertWithGenBankFormat()
        {
            // Gets the expected sequence from the Xml
            string expectedSequenceCount = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.SimpleFastaSequenceCount);
            string geneBankFilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.FilePathNode);
            string expectedSeqAfterAdd = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGeneBankNodeName, Constants.ExpectedSeqAfterAdd);
            string seqAfterAdd = string.Empty;

            string alphabetName = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.AlphabetNameNode);
            string actualSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleDnaAlphabetNode, Constants.ExpectedSingleChar);
            string seqBeforeAdding = string.Empty;

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence {0} is expected.", actualSequence, alphabetName));

            Sequence seqItem = new Sequence(Utility.GetAlphabet(alphabetName), "TCGN");

            Assert.IsTrue(File.Exists(geneBankFilePath));

            // Logs information to the log file
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: The File exist in the Path {0}.", geneBankFilePath));

            ISequenceParser parser = new GenBankParser();

            // Parse a FastA file Using Parse method and convert the same to sequence.
            IList <ISequence> sequence = parser.Parse(geneBankFilePath);
            Sequence          Seq      = (Sequence)sequence[0];

            Seq.IsReadOnly  = false;
            seqBeforeAdding = Seq.ToString();
            Seq.Add(seqItem[0]);

            // Validate sequence list after adding sequence item to the sequence list.
            seqAfterAdd = Seq.ToString();
            Assert.AreEqual(seqAfterAdd, expectedSeqAfterAdd);
            Assert.AreNotEqual(seqAfterAdd, seqBeforeAdding);
            Assert.AreEqual(Seq.Count.ToString((IFormatProvider)null), expectedSequenceCount);
            ApplicationLog.WriteLine(string.Format((IFormatProvider)null,
                                                   "Sequence BVT: Sequence {0} is expected.", seqAfterAdd));
        }
Пример #10
0
        public void GenBankFeatures()
        {
            // parse
            ISequenceParser       parser   = new GenBankParser(_singleProteinSeqGenBankFilename);
            ISequence             seq      = parser.Parse().FirstOrDefault();
            GenBankMetadata       metadata = seq.Metadata["GenBank"] as GenBankMetadata;
            List <CodingSequence> CDS      = metadata.Features.CodingSequences;

            Assert.AreEqual(CDS.Count, 3);
            Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1);
            Assert.AreEqual(CDS[0].GeneSymbol, string.Empty);
            Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1);
            Assert.IsFalse(CDS[0].Pseudo);
            Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2);
            Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0);
            parser.Close();
            parser.Dispose();
            ISequenceParser parser1 = new GenBankParser(_genBankDataPath + @"\NC_001284.gbk");
            ISequence       seq1    = parser1.Parse().FirstOrDefault();

            metadata = seq1.Metadata["GenBank"] as GenBankMetadata;
            Assert.AreEqual(metadata.Features.All.Count, 743);
            Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
            Assert.AreEqual(metadata.Features.Exons.Count, 32);
            Assert.AreEqual(metadata.Features.Introns.Count, 22);
            Assert.AreEqual(metadata.Features.Genes.Count, 60);
            Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
            Assert.AreEqual(metadata.Features.Promoters.Count, 17);
            Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
            Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
            Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117);
            ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation();

            byte[] tempData = new byte[seqTemp.Count];
            for (int i = 0; i < seqTemp.Count; i++)
            {
                tempData[i] = seqTemp[i];
            }
            string sequenceInString = ASCIIEncoding.ASCII.GetString(tempData);

            Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"'));
            Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count);
        }
Пример #11
0
        public void TestGenBankWhenUserSetsDnaAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.ParseOne(_singleDnaSeqGenBankFilename);

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            string             actual    = formatter.FormatString(seq);

            // test the formatting
            Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", ""));
        }
Пример #12
0
        void InvalidateGenBankParser(string node)
        {
            // Initialization of xml strings.
            FilePath = utilityObj.xmlUtil.GetTextValue(node,
                                                       Constants.FilePathNode);

            try
            {
                GenBankParser parserObj = new GenBankParser(FilePath);
                if (string.Equals(Constants.SimpleGenBankNodeName, node))
                {
                    parserObj.LocationBuilder = null;
                }
                else if (string.Equals(Constants.SimpleGenBankPrimaryNode, node))
                {
                    parserObj.Alphabet = Alphabets.RNA;
                }

                //parserObj.ParseOne(FilePath);
                parserObj.Parse();
                Assert.Fail();
            }
            catch (InvalidOperationException)
            {
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
                Console.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
            }
            catch (InvalidDataException)
            {
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
                Console.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
            }
            catch (Exception)
            {
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
                Console.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
            }
        }
Пример #13
0
        public void TestGenBankFailureWhenParsingEmpty()
        {
            bool failed = false;

            try
            {
                ISequenceParser parser = new GenBankParser();
                parser.Parse();
                failed = true;
            }
            catch (Exception)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string.");
            }
        }
Пример #14
0
        /// <summary>
        /// Finds a suitable parser that supports the specified file, opens the file and returns the parser.
        /// </summary>
        /// <param name="fileName">File name for which the parser is required.</param>
        /// <returns>If found returns the open parser as ISequenceParser else returns null.</returns>
        public static ISequenceParser FindParserByFileName(string fileName)
        {
            ISequenceParser parser = null;

            if (!string.IsNullOrEmpty(fileName))
            {
                if (IsFasta(fileName))
                {
                    parser = new FastAParser(fileName);
                }
                else if (IsFastQ(fileName))
                {
                    parser = new FastQParser(fileName);
                }
                else if (IsGenBank(fileName))
                {
                    parser = new GenBankParser(fileName);
                }
                else if (fileName.EndsWith(Properties.Resource.GFF_FILEEXTENSION, StringComparison.InvariantCultureIgnoreCase))
                {
                    parser = new GffParser(fileName);
                }
                else
                {
                    // Do a search through the known parsers to pick up custom parsers added through add-in.
                    string fileExtension = Path.GetExtension(fileName);
                    if (!string.IsNullOrEmpty(fileExtension))
                    {
                        parser = All.FirstOrDefault(p => p.SupportedFileTypes.Contains(fileExtension));
                        // If we found a match based on extension, then open the file - this
                        // matches the above behavior where a specific parser was created for
                        // the passed filename - the parser is opened automatically in the constructor.
                        if (parser != null)
                        {
                            parser.Open(fileName);
                        }
                    }
                }
            }

            return(parser);
        }
Пример #15
0
        public void TestGenBankForManyFiles()
        {
            // parser and formatter will be used for all files in input dir

            // iterate through the files in input dir, parsing and formatting each; write results
            // to log file
            DirectoryInfo inputDirInfo = new DirectoryInfo(_genBankDataPath);

            foreach (FileInfo fileInfo in inputDirInfo.GetFiles("*.gbk"))
            {
                ApplicationLog.WriteLine("Parsing file {0}...{1}", fileInfo.FullName, Environment.NewLine);
                ISequenceParser parser = new GenBankParser(fileInfo.FullName);


                try
                {
                    IEnumerable <ISequence> seqList = parser.Parse();
                    // don't do anything with it; just make sure it doesn't crash
                    ISequenceFormatter formatter = new GenBankFormatter(TempGenBankFileName);
                    string             actual    = string.Empty;

                    (formatter as GenBankFormatter).Write(seqList.ToList());

                    using (StreamReader reader = new StreamReader(TempGenBankFileName))
                    {
                        actual = reader.ReadToEnd();
                    }
                    File.Delete(TempGenBankFileName);
                    parser.Close();
                    parser.Dispose();

                    ApplicationLog.WriteLine("Parse completed successfully." + Environment.NewLine);
                }
                catch (Exception e)
                {
                    ApplicationLog.WriteLine("Parse failed:" + Environment.NewLine + e + Environment.NewLine);
                    throw;
                }
            }
        }
Пример #16
0
        public void TestGenBankWhenUserSetsIncorrectAlphabet()
        {
            // parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.Protein;
            bool failed = false;

            try
            {
                ISequence seq = parser.ParseOne(_singleDnaSeqGenBankFilename);
                failed = true;
            }
            catch (Exception)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for trying to create sequence using incorrect alphabet.");
            }
        }
Пример #17
0
        public void GenBankParserValidateParseFileName()
        {
            InitializeXmlVariables();

            // parse
            ISequenceParser parserObj = new GenBankParser();
            {
                IEnumerable <ISequence> seqList = parserObj.Parse(FilePath);
                ISequence seq = seqList.ElementAt(0);
                Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
                Assert.AreEqual(SeqId, seq.ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                var metadata = (GenBankMetadata)seq.Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);

                Assert.AreEqual(Version, metadata.Version.Version.ToString(null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, seq.ConvertToString());

                ApplicationLog.WriteLine("GenBank Parser BVT: Successfully validated the Sequence");
            }
        }
Пример #18
0
        /// <summary>
        /// Returns parser which supports the specified file.
        /// </summary>
        /// <param name="fileName">File name for which the parser is required.</param>
        /// <param name="parserName">Name of the parser to use.</param>
        /// <returns>If found returns the open parser as ISequenceParser else returns null.</returns>
        public static ISequenceParser FindParserByName(string fileName, string parserName)
        {
            ISequenceParser parser = null;

            if (!string.IsNullOrEmpty(fileName) && !string.IsNullOrEmpty(parserName))
            {
                if (parserName == Properties.Resource.FastAName)
                {
                    parser = new FastAParser(fileName);
                }
                else if (parserName == Properties.Resource.FastQName)
                {
                    parser = new FastQParser(fileName);
                }
                else if (parserName == Properties.Resource.GENBANK_NAME)
                {
                    parser = new GenBankParser(fileName);
                }
                else if (parserName == Properties.Resource.GFF_NAME)
                {
                    parser = new GffParser(fileName);
                }
                else
                {
                    // Do a search through the known parsers to pick up custom parsers added through add-in.
                    parser = All.FirstOrDefault(p => p.Name == parserName);
                    // If we found a match based on extension, then open the file - this
                    // matches the above behavior where a specific parser was created for
                    // the passed filename - the parser is opened automatically in the constructor.
                    if (parser != null)
                    {
                        parser.Open(fileName);
                    }
                }
            }

            return(parser);
        }
Пример #19
0
        public void GenBankFormatterValidateReadAndWriteMultipleDBLinks()
        {
            // Create a Sequence with all attributes.
            // parse and update the properties instead of parsing entire file.
            string          tempFileName = Path.GetTempFileName();
            ISequenceParser parser1      = new GenBankParser();

            using (parser1.Open(_genBankFile_WithMultipleDBLines))
            {
                var orgSeq = parser1.Parse().First();
                ISequenceFormatter formatter = new GenBankFormatter();
                using (formatter.Open(tempFileName))
                {
                    formatter.Format(orgSeq);
                    formatter.Close();
                }
            }
            var same = Utility.CompareFiles(tempFileName, _genBankFile_WithMultipleDBLines);

            File.Delete(tempFileName);
            Assert.IsTrue(same);
            ApplicationLog.WriteLine("GenBank Formatter: Successful read->write loop");
        }
Пример #20
0
        public void GenBankParserValidateParseOneWithSpecificFormats()
        {
            InitializeXmlVariables();
            // Initialization of xml strings.
            FilePath = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.FilePathNode);
            AlphabetName = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.AlphabetNameNode);
            SeqId = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.SequenceIdNode);
            StrandTopology = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTopologyNode);
            StrandType = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTypeNode);
            Div = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DivisionNode);
            Version = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.VersionNode);
            SequenceDate = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DateNode);
            PrimaryId = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.PrimaryIdNode);
            ExpectedSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.ExpectedSequenceNode);

            // parse
            using (ISequenceParser parserObj = new GenBankParser(FilePath))
            {
                parserObj.Alphabet = Alphabets.Protein;
                IEnumerable <ISequence> seq = parserObj.Parse();

                Assert.AreEqual(Utility.GetAlphabet(AlphabetName),
                                seq.ElementAt(0).Alphabet);
                Assert.AreEqual(SeqId, seq.ElementAt(0).ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                GenBankMetadata metadata = (GenBankMetadata)seq.ElementAt(0).Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(
                                    CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, new string(seq.ElementAt(0).Select(a => (char)a).ToArray()));
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Sequence");
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                                ExpectedSequence));
            }
        }
Пример #21
0
 public void TestGenBankLocusTokenParser()
 {
     // parse
     ISequenceParser parser = new GenBankParser();
     ISequence       seq    = parser.ParseOne(_genBankFile_LocusTokenParserTest);
 }
Пример #22
0
        public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection)
        {
            string queryName = rec.DisplayID.ToString().Split(' ')[0];

            // BLAST reports are saved in individual files by query and
            // numbered in the same order as they appear in the input FASTA file.
            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";

            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser      blastParser  = new BlastXmlParser();
            IList <BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser       gbParser     = new GenBankParser();

            int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos);

            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {
                            Hit blastHit = record.Hits[i];
                            // For each HSP
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {
                                Hsp    blastHsp      = blastHit.Hsps[j];
                                double percentId     = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;
                                string txt           = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity,
                                                                     Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage,
                                                                     hitsProcessed, Up.BlastMaxNumHits);
                                // if HSP passes user-defined thresholds
                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    string nextScore = "no";
                                    if ((i + 1) < record.Hits.Count())
                                    {
                                        if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score)
                                        {
                                            nextScore = "less than";
                                        }
                                        else
                                        {
                                            nextScore = "equal";
                                        }
                                    }
                                    else
                                    {
                                        nextScore = "non existent";
                                    }

                                    // parse GI numner from hit
                                    long        gi     = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem  = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string      gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";
                                    // init item
                                    string   img          = "#" + itemId.ToString();
                                    Item     item         = new Item(itemId, img);
                                    string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                                    item.Name        = headerTokens[0];
                                    item.Description = headerTokens[1];

                                    // write pairwise alignment
                                    writePairwiseAlignment(Up, blastHit, j, itemId);

                                    // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record
                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence gbRecord = gbParser.ParseOne(gbFile);
                                        item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord);
                                        GenBankMetadata     gbMeta   = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        CodingSequence      bestCds  = null;
                                        IList <FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem         bestItem = getBestFeatureItem(features);


                                        if (gbMeta.Features.CodingSequences.Count > 0)
                                        {
                                            bestCds = gbMeta.Features.CodingSequences[0];
                                        }

                                        for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++)
                                        {
                                            CodingSequence cds = gbMeta.Features.CodingSequences[k];
                                            //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start);
                                            int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start);
                                            int cdsSize  = Math.Abs(cds.Location.End - cds.Location.Start);
                                            if (cdsSize > bestSize)
                                            {
                                                bestCds = cds;
                                            }
                                        }
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                            case "InputOrder":
                                                facet = new Facet(f.Name, f.Type, seqPos);
                                                break;

                                            case "QuerySequence":
                                                facet = new Facet(f.Name, f.Type, rec.ToString());
                                                break;

                                            case "NextScore":
                                                facet = new Facet(f.Name, f.Type, nextScore);
                                                break;

                                            case "Annotated":
                                                string value = "na";
                                                if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                {
                                                    value = "top_annotated";
                                                }
                                                else
                                                {
                                                    if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                    {
                                                        value = "top_unannotated";
                                                    }
                                                    else
                                                    {
                                                        if (bestItem != null)
                                                        {
                                                            value = "annotated";
                                                        }
                                                        else
                                                        {
                                                            value = "unannotated";
                                                        }
                                                    }
                                                }
                                                facet = new Facet(f.Name, f.Type, value);
                                                break;

                                            default:
                                                //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank);
                                                facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank);
                                                break;
                                            }

                                            /*
                                             * if (f.Name == "InputOrder")
                                             * {
                                             *  facet = new Facet(f.Name, f.Type, seqPos);
                                             * }
                                             *
                                             * else
                                             * {
                                             *  facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item);
                                             * }
                                             */
                                            item.Facets.Add(facet);
                                        }
                                    }
                                    //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above)
                                    catch
                                    {
                                        Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        item.Href = "#";
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                            case ("InputOrder"):
                                                facet = new Facet(f.Name, f.Type, seqPos);
                                                break;

                                            case "QuerySequence":
                                                facet = new Facet(f.Name, f.Type, rec.ToString());
                                                break;

                                            case ("NextScore"):
                                                facet = new Facet(f.Name, f.Type, "no");
                                                break;

                                            case "Annotated":
                                                string value = "na";
                                                if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                {
                                                    value = "top_annotated";
                                                }
                                                else
                                                {
                                                    if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                    {
                                                        value = "top_unannotated";
                                                    }
                                                    else
                                                    {
                                                        value = "unannotated";
                                                    }
                                                }
                                                facet = new Facet(f.Name, f.Type, value);
                                                break;

                                            default:
                                                facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank);
                                                break;
                                            }
                                            item.Facets.Add(facet);
                                        }
                                        //throw (e);
                                    }
                                    // Add item to collection, increment to next item,
                                    collection.Items.Add(item);
                                    hitsProcessed += 1;
                                    itemId        += 1;
                                }
                            }
                        }
                    }
                    if ((record.Hits.Count()) == 0 || (hitsProcessed == 0))
                    {
                        // Init Pivot item
                        string img  = "#" + itemId.ToString();
                        Item   item = new Item(itemId, img);
                        item.Href = "#";
                        string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                        item.Name        = headerTokens[0];
                        item.Description = headerTokens[1];

                        // Write pairwise alignment to file.
                        writePairwiseAlignment(Up, itemId);

                        // Set facet values for each facet category to default values
                        foreach (FacetCategory f in Up.FacetCategories)
                        {
                            Facet facet = new Facet();
                            switch (f.Name)
                            {
                            case ("InputOrder"):
                                facet = new Facet(f.Name, f.Type, seqPos);
                                break;

                            case ("QuerySequence"):
                                facet = new Facet(f.Name, f.Type, rec.ToString());
                                break;

                            default:
                                facet = CreateFacet(f.Name, f.Type, record, item, 0);
                                break;
                            }
                            item.Facets.Add(facet);
                        }

                        // Add item to collection, increment to next item, skip remaining code
                        collection.Items.Add(item);
                        itemId        += 1;
                        hitsProcessed += 1;
                    }
                }
            }
            return(itemId);
        }
Пример #23
0
        public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos)
        {
            // BLAST reports are saved in individual files by query and
            // numbered in the same order as they appear in the input FASTA file.
            int[] annotatedIndex = new int[2];
            annotatedIndex[0] = -1;
            annotatedIndex[1] = -1;

            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";

            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser      blastParser  = new BlastXmlParser();
            IList <BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser       gbParser     = new GenBankParser();

            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {
                            Hit blastHit = record.Hits[i];
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {
                                Hsp    blastHsp      = blastHit.Hsps[j];
                                double percentId     = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;

                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    long        gi     = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem  = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string      gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";

                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence           gbRecord = gbParser.ParseOne(gbFile);
                                        GenBankMetadata     gbMeta   = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        IList <FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem         bestItem = getBestFeatureItem(features);
                                        if (bestItem != null)
                                        {
                                            annotatedIndex[0] = i;
                                            annotatedIndex[1] = j;
                                            return(annotatedIndex);
                                        }
                                    }

                                    catch
                                    {
                                        Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                    }
                                    hitsProcessed += 1;
                                }
                            }
                        }
                    }
                }
            }

            return(annotatedIndex);
        }
Пример #24
0
 public void TestGenBankWhenParsingOneOfMany()
 {
     // parse
     ISequenceParser parser = new GenBankParser();
     ISequence       seq    = parser.ParseOne(_multipleSeqGenBankFilename);
 }
Пример #25
0
        private void DoGenBank()
        {
            int progValue = 0;
            Dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Normal, new Action(delegate()
            {
                (CurrentControl as UserControl5).UserControl5Step1.Foreground = System.Windows.Media.Brushes.Black;
                progressBar1.Value = progValue;
            }));

            string inputDir = Up.ProjectDir + "\\xml";
            if (!Directory.Exists(inputDir))
            {
                throw new Exception("Directory " + inputDir + " does not exist.");
            }
            string[] blastXmlFiles = Directory.GetFiles(inputDir, "*.xml");
            int c = 1;
            Stack<GenBankItem> giList = new Stack<GenBankItem>();
            foreach (string blastFile in blastXmlFiles)
            {
                BlastXmlParser blastParser = new BlastXmlParser();
                progValue = Convert.ToInt32(Math.Round((double)c / blastXmlFiles.Count() * 100, 0));
                UpdateProgressBar(progValue, "Filtering results");
                try
                {
                    IList<BlastResult> blastResults = blastParser.Parse(blastFile);

                    List<GenBankItem> recordGiList = BlastUtil.filter(blastResults, Up.BlastMaxNumHits, Up.BlastMaxEvalue, Up.BlastMinPercentIdentity, Up.BlastMinPercentQueryCoverage);
                    foreach (GenBankItem gi in recordGiList)
                    {
                        giList.Push(gi);
                        Debug.WriteLine(gi.HitStart.ToString() + " " + gi.HitEnd.ToString());
                    }
                }
                catch
                {
                    FatalErrorDialog("Cannot parse " + blastFile);
                    Debug.WriteLine("Cannot parse " + blastFile);
                }
                c += 1;
            }
            progValue = 0;
            Dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Normal, new Action(delegate()
            {
                (CurrentControl as UserControl5).UserControl5Step2.Foreground = System.Windows.Media.Brushes.Black;
                progressBar1.Value = progValue;
            }));

            int totalGi = giList.Count();
            GenBankParser genkBankParser = new GenBankParser();
            int unParsableCount = 0;
            int notDownloadedCount = 0;
            string unParsableGIs = "";
            string notDownloadedGIs = "";
            bool isConnected = true;
            if (!IsConnectedToInternet())
            {
                isConnected = false;
                MessageBox.Show("Your internet connection appears to be down. As a result, missing GenBank records will not be downloaded.");
            }
            
            while (giList.Count > 0)
            {
                GenBankItem gitem = giList.Pop();
                progValue = Convert.ToInt32(Math.Round(((totalGi - giList.Count()) / (double)totalGi) * 100, 0));
                UpdateProgressBar(progValue, "Downloading GenBank records");
                string outFilename = Up.ProjectDir + "\\gb\\" + gitem.Id;
                outFilename += "_" + gitem.HitStart.ToString();
                outFilename += "_" + gitem.HitEnd.ToString();
                outFilename += ".gb";

                WebClient wc = new WebClient();

                if (File.Exists(outFilename))
                {
                    try
                    {
                        ISequence gpitem = genkBankParser.ParseOne(outFilename);
                    }
                    catch
                    {
                        if (isConnected)
                        {

                            string url = GetGenbankUrl(gitem);
                            try
                            {
                                wc.DownloadFile(url, outFilename);
                                Thread.Sleep(1000);
                            }
                            catch
                            {
                                wc.Proxy = null;
                                giList.Push(gitem);
                            }
                            try
                            {
                                ISequence gpitem = genkBankParser.ParseOne(outFilename);
                            }
                            catch
                            {
                                unParsableCount += 1;
                                unParsableGIs += gitem.Id + ",";
                            }
                        }
                        else
                        {
                            notDownloadedCount += 1;
                            notDownloadedGIs += gitem.Id + ",";
                        }
                    }

                }
                else
                {
                    if (isConnected)
                    {
                        string url = GetGenbankUrl(gitem);
                        try
                        {
                            wc.DownloadFile(url, outFilename);
                            Thread.Sleep(1000);
                        }
                        catch
                        {
                            wc.Proxy = null;
                            giList.Push(gitem);
                        }
                        try
                        {
                            ISequence gpitem = genkBankParser.ParseOne(outFilename);
                        }
                        catch
                        {
                            unParsableCount += 1;
                            unParsableGIs += gitem.Id + ",";
                        }
                    }
                    else
                    {
                        notDownloadedCount += 1;
                        notDownloadedGIs += gitem.Id + ",";
                    }
                }
            }

            if (notDownloadedCount > 0)
            {
                MessageBox.Show("Error downloading GenBank records: " + notDownloadedGIs + ".\r\nThis is likely caused by an interruption in the internet connection. Re-attempt the download by repeating this step.\r\n");
            }
            if (unParsableCount > 0)
            {
                MessageBox.Show("Error parsing GenBank records: " + unParsableGIs + ".\r\nThis is likely due to an unsupported field in the GenBank record. Contact the MBF development team at http://mbf.codeplex.com, and include one of the GI numbers in the bug report.\r\nYou can copy this message to the clipboard using Ctrl-C.\r\n");
            }
        }
Пример #26
0
        public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos)
        {
            // BLAST reports are saved in individual files by query and 
            // numbered in the same order as they appear in the input FASTA file.
            int[] annotatedIndex = new int[2];
            annotatedIndex[0] = -1;
            annotatedIndex[1] = -1;

            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";
            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser blastParser = new BlastXmlParser();
            IList<BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser gbParser = new GenBankParser();

            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {

                            Hit blastHit = record.Hits[i];
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {

                                Hsp blastHsp = blastHit.Hsps[j];
                                double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;
                                
                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";
                                    
                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence gbRecord = gbParser.ParseOne(gbFile);
                                        GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        IList<FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem bestItem = getBestFeatureItem(features);
                                        if (bestItem != null)
                                        {
                                            annotatedIndex[0] = i;
                                            annotatedIndex[1] = j;
                                            return annotatedIndex;
                                        }
                                    }
                                    
                                    catch
                                    {
                                        Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                    }
                                    hitsProcessed += 1;
                                }
                            }
                        }
                    }
                }
            }
            
            return annotatedIndex;
        }
Пример #27
0
        public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection)
        {
            
            string queryName = rec.DisplayID.ToString().Split(' ')[0];

            // BLAST reports are saved in individual files by query and 
            // numbered in the same order as they appear in the input FASTA file.
            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";
            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser blastParser = new BlastXmlParser();
            IList<BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser gbParser = new GenBankParser();

            int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos);
            
            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {

                            Hit blastHit = record.Hits[i];
                            // For each HSP
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {
                                
                                Hsp blastHsp = blastHit.Hsps[j];
                                double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;
                                string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity,
                                    Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage,
                                    hitsProcessed, Up.BlastMaxNumHits);
                                // if HSP passes user-defined thresholds
                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    string nextScore = "no";
                                    if ((i + 1) < record.Hits.Count())
                                    {
                                        if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score)
                                        {
                                            nextScore = "less than";
                                        }
                                        else
                                        {
                                            nextScore = "equal";
                                        }
                                    }
                                    else
                                    {
                                        nextScore = "non existent";
                                    }

                                    // parse GI numner from hit
                                    long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";
                                    // init item
                                    string img = "#" + itemId.ToString();
                                    Item item = new Item(itemId, img);
                                    string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                                    item.Name = headerTokens[0];
                                    item.Description = headerTokens[1];

                                    // write pairwise alignment
                                    writePairwiseAlignment(Up, blastHit, j, itemId);

                                    // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record
                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence gbRecord = gbParser.ParseOne(gbFile);
                                        item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord);
                                        GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        CodingSequence bestCds = null;
                                        IList<FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem bestItem = getBestFeatureItem(features);

                                        
                                        if (gbMeta.Features.CodingSequences.Count > 0)
                                        {
                                            bestCds = gbMeta.Features.CodingSequences[0];
                                        }
                                        
                                        for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++)
                                        {
                                            CodingSequence cds = gbMeta.Features.CodingSequences[k];
                                            //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start);
                                            int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start);
                                            int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start);
                                            if (cdsSize > bestSize)
                                            {
                                                bestCds = cds;
                                            }
                                        }
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                                case "InputOrder":
                                                    facet = new Facet(f.Name, f.Type, seqPos);
                                                    break;
                                                case "QuerySequence":
                                                    facet = new Facet(f.Name, f.Type, rec.ToString());
                                                    break;
                                                case "NextScore":
                                                    facet = new Facet(f.Name, f.Type, nextScore);
                                                    break;
                                                case "Annotated":
                                                    string value = "na";
                                                    if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                    {
                                                        value = "top_annotated";
                                                    }
                                                    else
                                                    {
                                                        if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                        {
                                                            value = "top_unannotated";
                                                        }else{
                                                            if (bestItem != null)
                                                            {
                                                                value = "annotated";
                                                            }else{
                                                                value = "unannotated";
                                                            }
                                                        }
                                                    }
                                                    facet = new Facet(f.Name, f.Type, value);
                                                    break;
                                                default:
                                                    //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank);
                                                    facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank);
                                                    break;
                                            }
                                            /*
                                            if (f.Name == "InputOrder")
                                            {
                                                facet = new Facet(f.Name, f.Type, seqPos);
                                            }

                                            else
                                            {
                                                facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item);
                                            }
                                            */
                                            item.Facets.Add(facet);
                                        }

                                    }
                                    //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above)
                                    catch
                                    {

                                        Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        item.Href = "#";
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                                case ("InputOrder"):
                                                    facet = new Facet(f.Name, f.Type, seqPos);
                                                    break;
                                                case "QuerySequence":
                                                    facet = new Facet(f.Name, f.Type, rec.ToString());
                                                    break;
                                                case ("NextScore"):
                                                    facet = new Facet(f.Name, f.Type, "no");
                                                    break;
                                                case "Annotated":
                                                    string value = "na";
                                                    if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                    {
                                                        value = "top_annotated";
                                                    }
                                                    else
                                                    {
                                                        if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                        {
                                                            value = "top_unannotated";
                                                        }
                                                        else
                                                        {
                                                            value = "unannotated";
                                                        }
                                                    }
                                                    facet = new Facet(f.Name, f.Type, value);
                                                    break;
                                                default:
                                                    facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank);
                                                    break;
                                            }
                                            item.Facets.Add(facet);
                                        }
                                        //throw (e);
                                    }
                                    // Add item to collection, increment to next item, 
                                    collection.Items.Add(item);
                                    hitsProcessed += 1;
                                    itemId += 1;
                                }
                            }
                        }
                    }
                    if ((record.Hits.Count()) == 0 || (hitsProcessed == 0))
                    {
                        // Init Pivot item
                        string img = "#" + itemId.ToString();
                        Item item = new Item(itemId, img);
                        item.Href = "#";
                        string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                        item.Name = headerTokens[0];
                        item.Description = headerTokens[1];

                        // Write pairwise alignment to file.
                        writePairwiseAlignment(Up, itemId);

                        // Set facet values for each facet category to default values
                        foreach (FacetCategory f in Up.FacetCategories)
                        {
                            Facet facet = new Facet();
                            switch (f.Name)
                            {
                                case ("InputOrder"):
                                    facet = new Facet(f.Name, f.Type, seqPos);
                                    break;
                                case ("QuerySequence"):
                                    facet = new Facet(f.Name, f.Type, rec.ToString());
                                    break;
                                default:
                                    facet = CreateFacet(f.Name, f.Type, record, item, 0);
                                    break;
                            }
                            item.Facets.Add(facet);
                        }

                        // Add item to collection, increment to next item, skip remaining code
                        collection.Items.Add(item);
                        itemId += 1;
                        hitsProcessed += 1;
                    }
                }
            }
            return itemId;
        }
Пример #28
0
        public void GenBankParserValidateParseOneWithSpecificFormats()
        {
            InitializeXmlVariables();
            // Initialization of xml strings.
            FilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.FilePathNode);
            AlphabetName = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.AlphabetNameNode);
            MolType = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.MoleculeTypeNode);
            IsSequenceReadOnly = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.IsReadOnlyNode);
            SeqId = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.SequenceIdNode);
            StrandTopology = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTopologyNode);
            StrandType = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTypeNode);
            Div = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DivisionNode);
            Version = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.VersionNode);
            SequenceDate = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DateNode);
            PrimaryId = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.PrimaryIdNode);
            ExpectedSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.ExpectedSequenceNode);

            // parse
            BasicSequenceParser parserObj = new GenBankParser();

            parserObj.Alphabet = Alphabets.Protein;
            parserObj.Encoding = NcbiEAAEncoding.Instance;
            ISequence seq = parserObj.ParseOne(FilePath);

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName),
                            seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType),
                            seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType,
                                metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                            metadata.Locus.StrandTopology.ToString().ToUpper(
                                CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                            metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                            ExpectedSequence));
        }
Пример #29
0
        public void GenBankFormatterValidateFormatFilePath()
        {
            InitializeXmlVariables();
            ISequenceParser   parserObj = new GenBankParser();
            IList <ISequence> seqList1  = parserObj.Parse(FilePath);

            string expectedUpdatedSequence =
                ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
            Sequence orgSeq =
                new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);

            orgSeq.ID           = seqList1[0].ID;
            orgSeq.MoleculeType = seqList1[0].MoleculeType;

            orgSeq.Metadata.Add("GenBank", (GenBankMetadata)seqList1[0].Metadata["GenBank"]);

            ISequenceFormatter formatter = new GenBankFormatter();

            formatter.Format(orgSeq, Constants.GenBankTempFileName);

            // parse
            parserObj = new GenBankParser();
            IList <ISequence> seqList =
                parserObj.Parse(Constants.GenBankTempFileName);

            ISequence seq = seqList[0];

            // test the non-metadata properties
            if (0 == string.Compare(IsSequenceReadOnly, "true",
                                    false, CultureInfo.CurrentCulture))
            {
                Assert.IsTrue(seq.IsReadOnly);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the ReadOnly Property");
            }

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata =
                (GenBankMetadata)orgSeq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType,
                                metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                            metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                            metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "GenBank Formatter BVT: Successfully validated the Sequence '{0}'",
                                            ExpectedSequence));

            File.Delete(Constants.GenBankTempFileName);
        }
Пример #30
0
        public void TestFeatureItem()
        {
            ISequence     seq;
            ISequence     featureSeq       = null;
            GenBankParser parser           = new GenBankParser();
            string        _genBankDataPath = @"TestUtils\GenBank";

            seq = parser.ParseOne(_genBankDataPath + @"\BK000016-tpa.gbk");
            GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata;

            #region Test GetSubSequence Method

            featureSeq = metadata.Features.All[0].GetSubSequence(seq);
            int start = metadata.Features.All[0].Location.Start - 1;
            int end   = metadata.Features.All[0].Location.End - start;
            Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString());
            featureSeq = metadata.Features.All[1].GetSubSequence(seq);
            start      = metadata.Features.All[1].Location.Start - 1;
            end        = metadata.Features.All[1].Location.End - start;
            Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString());


            seq = new Sequence(Alphabets.DNA, "ACGTAAAGGT");
            Sequence        refSeq     = new Sequence(Alphabets.DNA, "AAAAATTTT");
            LocationBuilder locbuilder = new LocationBuilder();
            ILocation       loc        = locbuilder.GetLocation("join(complement(4..8),Ref1:5..7)");
            Assert.AreEqual("join(complement(4..8),Ref1:5..7)", locbuilder.GetLocationString(loc));
            FeatureItem fi = new FeatureItem("Feature1", loc);
            Dictionary <string, ISequence> refSeqs = new Dictionary <string, ISequence>();
            refSeqs.Add("Ref1", refSeq);
            ISequence result = fi.GetSubSequence(seq, refSeqs);
            Assert.AreEqual("ATTTCATT", result.ToString());
            #endregion

            #region Test GetSubFeatures Method
            SequenceFeatures seqFeatures = new SequenceFeatures();
            FeatureItem      source      = new FeatureItem("Source", "1..1509");
            FeatureItem      mRNA        = new FeatureItem("mRNA", "join(10..567,789..1320)");
            FeatureItem      cds         = new FeatureItem("CDS", "join(54..567,789..1254)");
            FeatureItem      exon1       = new FeatureItem("Exon", "10..567");
            FeatureItem      intron      = new FeatureItem("Intron", "568..788");
            FeatureItem      exon2       = new FeatureItem("Exon", "789..1320");

            seqFeatures.All.Add(source);
            seqFeatures.All.Add(mRNA);
            seqFeatures.All.Add(cds);
            seqFeatures.All.Add(exon1);
            seqFeatures.All.Add(intron);
            seqFeatures.All.Add(exon2);
            List <FeatureItem> subFeatures = source.GetSubFeatures(seqFeatures);
            Assert.AreEqual(5, subFeatures.Count);
            subFeatures = mRNA.GetSubFeatures(seqFeatures);
            Assert.AreEqual(4, subFeatures.Count);
            subFeatures = cds.GetSubFeatures(seqFeatures);
            Assert.AreEqual(1, subFeatures.Count);
            subFeatures = exon1.GetSubFeatures(seqFeatures);
            Assert.AreEqual(0, subFeatures.Count);
            subFeatures = intron.GetSubFeatures(seqFeatures);
            Assert.AreEqual(0, subFeatures.Count);
            subFeatures = exon2.GetSubFeatures(seqFeatures);
            Assert.AreEqual(0, subFeatures.Count);

            #endregion
        }