コード例 #1
0
        public void TestGenBankWhenParsingOne()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence       seq    = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault();

            // test the non-metadata properties
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);
            Assert.AreEqual("SCU49845", seq.ID);

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None);
            Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower(CultureInfo.CurrentCulture));
            Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse("21-JUN-1999", (IFormatProvider)null), metadata.Locus.Date);
            Assert.AreEqual("1", metadata.Version.Version);
            Assert.AreEqual("1293613", metadata.Version.GiNumber);

            // test that we're correctly putting all types of metadata in the right places
            Assert.AreEqual(1, seq.Metadata.Count);
            IList <CitationReference> referenceList = metadata.References;

            Assert.AreEqual(3, referenceList.Count);
            IList <FeatureItem> featureList = metadata.Features.All;

            Assert.AreEqual(6, featureList.Count);
            Assert.AreEqual(4, featureList[0].Qualifiers.Count);
            Assert.AreEqual(5, featureList[1].Qualifiers.Count);
            Assert.AreEqual(1, featureList[2].Qualifiers.Count);

            // test the sequence string
            string expected = @"gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc";

            Assert.AreEqual(expected, new string(seq.Select(a => (char)a).ToArray()));

            // format
            ISequenceFormatter formatter = new GenBankFormatter();

            formatter.Format(seq, TempGenBankFileName);

            string actual = string.Empty;

            using (StreamReader reader = new StreamReader(TempGenBankFileName))
            {
                actual = reader.ReadToEnd();
            }
            File.Delete(TempGenBankFileName);

            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", ""));
        }
コード例 #2
0
        /// <summary>
        /// Creates matter from genBank metadata.
        /// </summary>
        /// <param name="metadata">
        /// The metadata.
        /// </param>
        /// <returns>
        /// The <see cref="Matter"/>.
        /// </returns>
        public Matter CreateMatterFromGenBankMetadata(GenBankMetadata metadata)
        {
            var matter = new Matter
            {
                Name   = $"{ExtractMatterName(metadata)} | {metadata.Version.CompoundAccession}",
                Nature = Nature.Genetic
            };

            FillGroupAndSequenceType(matter);

            return(matter);
        }
コード例 #3
0
ファイル: Program.cs プロジェクト: radtek/GenomeDatabase
        private static void fill_metadata(GenBankMetadata meta, long id)
        {
            Metadata metadata    = new Metadata(meta, id);
            long     metadata_id = metadata.execute_query(conn);

            fill_locus(meta.Locus, metadata_id);
            fill_version(meta.Version, metadata_id);
            foreach (var feat in meta.Features.All)
            {
                fill_feature(feat, metadata_id);
            }
        }
コード例 #4
0
        public void GenBankParserValidateParseFileName()
        {
            InitializeXmlVariables();
            // parse
            ISequenceParser   parserObj = new GenBankParser();
            IList <ISequence> seqList   = parserObj.Parse(FilePath);

            ISequence seq = seqList[0];

            // test the non-metadata properties
            if (0 == string.Compare(IsSequenceReadOnly, "true", false,
                                    CultureInfo.CurrentCulture))
            {
                Assert.IsTrue(seq.IsReadOnly);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the ReadOnly Property");
            }

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType,
                                metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                            metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                            metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                            ExpectedSequence));
        }
コード例 #5
0
        public void TestGenBankFeaturesWithBinaryFormatter()
        {
            Stream stream = null;

            try
            {
                stream = File.Open("GenbankMetadata.data", FileMode.Create);
                BinaryFormatter formatter = new BinaryFormatter();
                ISequenceParser parser    = new GenBankParser();
                ISequence       seq       = parser.ParseOne(@"testdata\GenBank\NC_001284.gbk");
                GenBankMetadata metadata  = seq.Metadata["GenBank"] as GenBankMetadata;
                Assert.AreEqual(metadata.Features.All.Count, 743);
                Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
                Assert.AreEqual(metadata.Features.Exons.Count, 32);
                Assert.AreEqual(metadata.Features.Introns.Count, 22);
                Assert.AreEqual(metadata.Features.Genes.Count, 60);
                Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
                Assert.AreEqual(metadata.Features.Promoters.Count, 17);
                Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
                Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
                Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString());
                Assert.AreEqual(metadata.GetFeatures(11918, 12241).Count, 2);
                formatter.Serialize(stream, metadata);
                stream.Seek(0, SeekOrigin.Begin);
                GenBankMetadata deserializedMetadata = (GenBankMetadata)formatter.Deserialize(stream);
                Assert.AreNotSame(metadata, deserializedMetadata);
                Assert.AreEqual(deserializedMetadata.Features.All.Count, 743);
                Assert.AreEqual(deserializedMetadata.Features.CodingSequences.Count, 117);
                Assert.AreEqual(deserializedMetadata.Features.Exons.Count, 32);
                Assert.AreEqual(deserializedMetadata.Features.Introns.Count, 22);
                Assert.AreEqual(deserializedMetadata.Features.Genes.Count, 60);
                Assert.AreEqual(deserializedMetadata.Features.MiscFeatures.Count, 455);
                Assert.AreEqual(deserializedMetadata.Features.Promoters.Count, 17);
                Assert.AreEqual(deserializedMetadata.Features.TransferRNAs.Count, 21);
                Assert.AreEqual(deserializedMetadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
                Assert.AreEqual(deserializedMetadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString());
                Assert.AreEqual(deserializedMetadata.GetFeatures(11918, 12241).Count, 2);
            }
            catch
            {
                Assert.Fail();
            }
            finally
            {
                if (stream != null)
                {
                    stream.Close();
                    stream = null;
                }
            }
        }
コード例 #6
0
        public void GenBankParserValidateParseFileNameWithStream()
        {
            InitializeXmlVariables();
            List <ISequence>        seq     = null;
            IEnumerable <ISequence> seqList = null;

            // Parse the Stream.
            using (ISequenceParser parserObj = new GenBankParser())
            {
                using (StreamReader reader = new StreamReader(FilePath))
                {
                    seqList = parserObj.Parse(reader);
                    seq     = seqList.ToList();
                }

                Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq[0].Alphabet);
                Assert.AreEqual(SeqId, seq[0].ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                GenBankMetadata metadata = (GenBankMetadata)seq[0].Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);

                Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, new string(seq[0].Select(a => (char)a).ToArray()));

                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Sequence");
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                                ExpectedSequence));
            }
        }
コード例 #7
0
        public void TestGenBankWhenParsingOne()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence       seq    = parser.ParseOne(_singleProteinSeqGenBankFilename);

            // test the non-metadata properties
            Assert.IsTrue(seq.IsReadOnly);
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);
            Assert.AreEqual(MoleculeType.DNA, seq.MoleculeType);
            Assert.AreEqual("SCU49845", seq.DisplayID);
            Assert.AreEqual("SCU49845", seq.ID);

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None);
            Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower());
            Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse("21-JUN-1999"), metadata.Locus.Date);
            Assert.AreEqual("1", metadata.Version.Version);
            Assert.AreEqual("1293613", metadata.Version.GINumber);

            // test that we're correctly putting all types of metadata in the right places
            Assert.AreEqual(1, seq.Metadata.Count);
            IList <CitationReference> referenceList = metadata.References;

            Assert.AreEqual(3, referenceList.Count);
            IList <FeatureItem> featureList = metadata.Features.All;

            Assert.AreEqual(6, featureList.Count);
            Assert.AreEqual(4, featureList[0].Qualifiers.Count);
            Assert.AreEqual(5, featureList[1].Qualifiers.Count);
            Assert.AreEqual(1, featureList[2].Qualifiers.Count);

            // test the sequence string
            string expected = @"GATCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCCGACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCTGCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGAACCGCCAATAGACAACATATGTAACATATTTAGGATATACCTCGAAAATAATAAACCGCCACACTGTCATTATTATAATTAGAAACAGAACGCAAAAATTATCCACTATATAATTCAAAGACGCGAAAAAAAAAGAACAACGCGTCATAGAACTTTTGGCAATTCGCGTCACAAATAAATTTTGGCAACTTATGTTTCCTCTTCGAGCAGTACTCGAGCCCTGTCTCAAGAATGTAATAATACCCATCGTAGGTATGGTTAAAGATAGCATCTCCACAACCTCAAAGCTCCTTGCCGAGAGTCGCCCTCCTTTGTCGAGTAATTTTCACTTTTCATATGAGAACTTATTTTCTTATTCTTTACTCTCACATCCTGTAGTGATTGACACTGCAACAGCCACCATCACTAGAAGAACAGAACAATTACTTAATAGAAAAATTATATCTTCCTCGAAACGATTTCCTGCTTCCAACATCTACGTATATCAAGAAGCATTCACTTACCATGACACAGCTTCAGATTTCATTATTGCTGACAGCTACTATATCACTACTCCATCTAGTAGTGGCCACGCCCTATGAGGCATATCCTATCGGAAAACAATACCCCCCAGTGGCAAGAGTCAATGAATCGTTTACATTTCAAATTTCCAATGATACCTATAAATCGTCTGTAGACAAGACAGCTCAAATAACATACAATTGCTTCGACTTACCGAGCTGGCTTTCGTTTGACTCTAGTTCTAGAACGTTCTCAGGTGAACCTTCTTCTGACTTACTATCTGATGCGAACACCACGTTGTATTTCAATGTAATACTCGAGGGTACGGACTCTGCCGACAGCACGTCTTTGAACAATACATACCAATTTGTTGTTACAAACCGTCCATCCATCTCGCTATCGTCAGATTTCAATCTATTGGCGTTGTTAAAAAACTATGGTTATACTAACGGCAAAAACGCTCTGAAACTAGATCCTAATGAAGTCTTCAACGTGACTTTTGACCGTTCAATGTTCACTAACGAAGAATCCATTGTGTCGTATTACGGACGTTCTCAGTTGTATAATGCGCCGTTACCCAATTGGCTGTTCTTCGATTCTGGCGAGTTGAAGTTTACTGGGACGGCACCGGTGATAAACTCGGCGATTGCTCCAGAAACAAGCTACAGTTTTGTCATCATCGCTACAGACATTGAAGGATTTTCTGCCGTTGAGGTAGAATTCGAATTAGTCATCGGGGCTCACCAGTTAACTACCTCTATTCAAAATAGTTTGATAATCAACGTTACTGACACAGGTAACGTTTCATATGACTTACCTCTAAACTATGTTTATCTCGATGACGATCCTATTTCTTCTGATAAATTGGGTTCTATAAACTTATTGGATGCTCCAGACTGGGTGGCATTAGATAATGCTACCATTTCCGGGTCTGTCCCAGATGAATTACTCGGTAAGAACTCCAATCCTGCCAATTTTTCTGTGTCCATTTATGATACTTATGGTGATGTGATTTATTTCAACTTCGAAGTTGTCTCCACAACGGATTTGTTTGCCATTAGTTCTCTTCCCAATATTAACGCTACAAGGGGTGAATGGTTCTCCTACTATTTTTTGCCTTCTCAGTTTACAGACTACGTGAATACAAACGTTTCATTAGAGTTTACTAATTCAAGCCAAGACCATGACTGGGTGAAATTCCAATCATCTAATTTAACATTAGCTGGAGAAGTGCCCAAGAATTTCGACAAGCTTTCATTAGGTTTGAAAGCGAACCAAGGTTCACAATCTCAAGAGCTATATTTTAACATCATTGGCATGGATTCAAAGATAACTCACTCAAACCACAGTGCGAATGCAACGTCCACAAGAAGTTCTCACCACTCCACCTCAACAAGTTCTTACACATCTTCTACTTACACTGCAAAAATTTCTTCTACCTCCGCTGCTGCTACTTCTTCTGCTCCAGCAGCGCTGCCAGCAGCCAATAAAACTTCATCTCACAATAAAAAAGCAGTAGCAATTGCGTGCGGTGTTGCTATCCCATTAGGCGTTATCCTAGTAGCTCTCATTTGCTTCCTAATATTCTGGAGACGCAGAAGGGAAAATCCAGACGATGAAAACTTACCGCATGCTATTAGTGGACCTGATTTGAATAATCCTGCAAATAAACCAAATCAAGAAAACGCTACACCTTTGAACAACCCCTTTGATGATGATGCTTCCTCGTACGATGATACTTCAATAGCAAGAAGATTGGCTGCTTTGAACACTTTGAAATTGGATAACCACTCTGCCACTGAATCTGATATTTCCAGCGTGGATGAAAAGAGAGATTCTCTATCAGGTATGAATACATACAATGATCAGTTCCAATCCCAAAGTAAAGAAGAATTATTAGCAAAACCCCCAGTACAGCCTCCAGAGAGCCCGTTCTTTGACCCACAGAATAGGTCTTCTTCTGTGTATATGGATAGTGAACCAGCAGTAAATAAATCCTGGCGATATACTGGCAACCTGTCACCAGTCTCTGATATTGTCAGAGACAGTTACGGATCACAAAAAACTGTTGATACAGAAAAACTTTTCGATTTAGAAGCACCAGAGAAGGAAAAACGTACGTCAAGGGATGTCACTATGTCTTCACTGGACCCTTGGAACAGCAATATTAGCCCTTCTCCCGTAAGAAAATCAGTAACACCATCACCATATAACGTAACGAAGCATCGTAACCGCCACTTACAAAATATTCAAGACTCTCAAAGCGGTAAAAACGGAATCACTCCCACAACAATGTCAACTTCATCTTCTGACGATTTTGTTCCGGTTAAAGATGGTGAAAATTTTTGCTGGGTCCATAGCATGGAACCAGACAGAAGACCAAGTAAGAAAAGGTTAGTAGATTTTTCAAATAAGAGTAATGTCAATGTTGGTCAAGTTAAGGACATTCACGGACGCATCCCAGAAATGCTGTGATTATACGCAACGATATTTTGCTTAATTTTATTTTCCTGTTTTATTTTTTATTAGTGGTTTACAGATACCCTATATTTTATTTAGTTTTTATACTTAGAGACATTTAATTTTAATTCCATTCTTCAAATTTCATTTTTGCACTTAAAACAAAGATCCAAAAATGCTCTCGCCCTCTTCATATTGAGAATACACTCCATTCAAAATTTTGTCGTCACCGCTGATTAATTTTTCACTAAACTGATGAATAATCAAAGGCCCCACGTCAGAACCGACTAAAGAAGTGAGTTTTATTTTAGGAGGTTGAAAACCATTATTGTCTGGTAAATTTTCATCTTCTTGACATTTAACCCAGTTTGAATCCCTTTCAATTTCTGCTTTTTCCTCCAAACTATCGACCCTCCTGTTTCTGTCCAACTTATGTCCTAGTTCCAATTCGATCGCATTAATAACTGCTTCAAATGTTATTGTGTCATCGTTGACTTTAGGTAATTTCTCCAAATGCATAATCAAACTATTTAAGGAAGATCGGAATTCGTCGAACACTTCAGTTTCCGTAATGATCTGATCGTCTTTATCCACATGTTGTAATTCACTAAAATCTAAAACGTATTTTTCAATGCATAAATCGTTCTTTTTATTAATAATGCAGATGGAAAATCTGTAAACGTGCGTTAATTTAGAAAGAACATCCAGTATAAGTTCTTCTATATAGTCAATTAAAGCAGGATGCCTATTAATGGGAACGAACTGCGGCAAGTTGAATGACTGGTAAGTAGTGTAGTCGAATGACTGAGGTGGGTATACATTTCTATAAAATAAAATCAAATTAATGTAGCATTTTAAGTATACCCTCAGCCACTTCTCTACCCATCTATTCATAAAGCTGACGCAACGATTACTATTTTTTTTTTCTTCTTGGATCTCAGTCGTCGCAAAAACGTATACCTTCTTTTTCCGACCTTTTTTTTAGCTTTCTGGAAAAGTTTATATTAGTTAAACAGGGTCTAGTCTTAGTGTGAAAGCTAGTGGTTTCGATTGACTGATATTAAGAAAGTGGAAATTAAATTAGTAGTGTAGACGTATATGCATATGTATTTCTCGCCTGTTTATGTTTCTACGTACTTTTGATTTATAGCAAGGGGAAAAGAAATACATACTATTTTTTGGTAAAGGTGAAAGCATAATGTAAAAGCTAGAATAAAATGGACGAAATAAAGAGAGGCTTAGTTCATCTTTTTTCCAAAAAGCACCCAATGATAATAACTAAAATGAAAAGGATTTGCCATCTGTCAGCAACATCAGTTGTGTGAGCAATAATAAAATCATCACCTCCGTTGCCTTTAGCGCGTTTGTCGTTTGTATCTTCCGTAATTTTAGTCTTATCAATGGGAATCATAAATTTTCCAATGAATTAGCAATTTCGTCCAATTCTTTTTGAGCTTCTTCATATTTGCTTTGGAATTCTTCGCACTTCTTTTCCCATTCATCTCTTTCTTCTTCCAAAGCAACGATCCTTCTACCCATTTGCTCAGAGTTCAAATCGGCCTCTTTCAGTTTATCCATTGCTTCCTTCAGTTTGGCTTCACTGTCTTCTAGCTGTTGTTCTAGATCCTGGTTTTTCTTGGTGTAGTTCTCATTATTAGATCTCAAGTTATTGGAGTCTTCAGCCAATTGCTTTGTATCAGACAATTGACTCTCTAACTTCTCCACTTCACTGTCGAGTTGCTCGTTTTTAGCGGACAAAGATTTAATCTCGTTTTCTTTTTCAGTGTTAGATTGCTCTAATTCTTTGAGCTGTTCTCTCAGCTCCTCATATTTTTCTTGCCATGACTCAGATTCTAATTTTAAGCTATTCAATTTCTCTTTGATC";

            Assert.AreEqual(expected, seq.ToString());

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            string             actual    = formatter.FormatString(seq);

            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", ""));
        }
コード例 #8
0
        public void GenBankFeatures()
        {
            // parse
            ISequenceParser       parser   = new GenBankParser(_singleProteinSeqGenBankFilename);
            ISequence             seq      = parser.Parse().FirstOrDefault();
            GenBankMetadata       metadata = seq.Metadata["GenBank"] as GenBankMetadata;
            List <CodingSequence> CDS      = metadata.Features.CodingSequences;

            Assert.AreEqual(CDS.Count, 3);
            Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1);
            Assert.AreEqual(CDS[0].GeneSymbol, string.Empty);
            Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1);
            Assert.IsFalse(CDS[0].Pseudo);
            Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2);
            Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0);
            parser.Close();
            parser.Dispose();
            ISequenceParser parser1 = new GenBankParser(_genBankDataPath + @"\NC_001284.gbk");
            ISequence       seq1    = parser1.Parse().FirstOrDefault();

            metadata = seq1.Metadata["GenBank"] as GenBankMetadata;
            Assert.AreEqual(metadata.Features.All.Count, 743);
            Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
            Assert.AreEqual(metadata.Features.Exons.Count, 32);
            Assert.AreEqual(metadata.Features.Introns.Count, 22);
            Assert.AreEqual(metadata.Features.Genes.Count, 60);
            Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
            Assert.AreEqual(metadata.Features.Promoters.Count, 17);
            Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
            Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
            Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117);
            ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation();

            byte[] tempData = new byte[seqTemp.Count];
            for (int i = 0; i < seqTemp.Count; i++)
            {
                tempData[i] = seqTemp[i];
            }
            string sequenceInString = ASCIIEncoding.ASCII.GetString(tempData);

            Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"'));
            Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count);
        }
コード例 #9
0
        /// <summary>
        ///  Validate metadata features
        /// </summary>
        /// <param name="deserializedMetadta">Deserializaed Metadata</param>
        /// <param name="metadata">Metadata</param>
        private static void ValidateDeserializedMetadata(GenBankMetadata deserializedMetadta,
                                                         GenBankMetadata metadata)
        {
            Assert.AreEqual(deserializedMetadta.Keywords, metadata.Keywords);
            Assert.AreEqual(deserializedMetadta.Locus.Date, metadata.Locus.Date);
            Assert.AreEqual(deserializedMetadta.Locus.DivisionCode, metadata.Locus.DivisionCode);
            Assert.AreEqual(deserializedMetadta.Locus.MoleculeType, metadata.Locus.MoleculeType);
            Assert.AreEqual(deserializedMetadta.Locus.Name, metadata.Locus.Name);
            Assert.AreEqual(deserializedMetadta.Locus.SequenceLength, metadata.Locus.SequenceLength);
            Assert.AreEqual(deserializedMetadta.Locus.SequenceType, metadata.Locus.SequenceType);
            Assert.AreEqual(deserializedMetadta.Locus.Strand, metadata.Locus.Strand);
            Assert.AreEqual(deserializedMetadta.Locus.StrandTopology, metadata.Locus.StrandTopology);
            Assert.AreEqual(deserializedMetadta.Origin, deserializedMetadta.Origin);
            Assert.AreEqual(deserializedMetadta.Primary, metadata.Primary);
            Assert.AreEqual(deserializedMetadta.Project.Name, metadata.Project.Name);
            for (int i = 0; i < deserializedMetadta.Project.Numbers.Count; i++)
            {
                Assert.AreEqual(deserializedMetadta.Project.Numbers[i], metadata.Project.Numbers[i]);
            }

            for (int i = 0; i < deserializedMetadta.References.Count; i++)
            {
                Assert.AreEqual(deserializedMetadta.References[i].Authors, metadata.References[i].Authors);
                Assert.AreEqual(deserializedMetadta.References[i].Consortiums, metadata.References[i].Consortiums);
                Assert.AreEqual(deserializedMetadta.References[i].Journal, metadata.References[i].Journal);
                Assert.AreEqual(deserializedMetadta.References[i].Location, metadata.References[i].Location);
                Assert.AreEqual(deserializedMetadta.References[i].Medline, metadata.References[i].Medline);
                Assert.AreEqual(deserializedMetadta.References[i].Number, metadata.References[i].Number);
                Assert.AreEqual(deserializedMetadta.References[i].PubMed, metadata.References[i].PubMed);
                Assert.AreEqual(deserializedMetadta.References[i].Remarks, metadata.References[i].Remarks);
                Assert.AreEqual(deserializedMetadta.References[i].Title, metadata.References[i].Title);
            }

            Assert.AreEqual(deserializedMetadta.Segment.Current, metadata.Segment.Current);
            Assert.AreEqual(deserializedMetadta.Segment.Count, metadata.Segment.Count);
            Assert.AreEqual(deserializedMetadta.Source.CommonName, metadata.Source.CommonName);
            Assert.AreEqual(deserializedMetadta.Source.Organism.ClassLevels, metadata.Source.Organism.ClassLevels);
            Assert.AreEqual(deserializedMetadta.Source.Organism.Genus, metadata.Source.Organism.Genus);
            Assert.AreEqual(deserializedMetadta.Source.Organism.Species, metadata.Source.Organism.Species);
            Assert.AreEqual(deserializedMetadta.Version.Accession, metadata.Version.Accession);
            Assert.AreEqual(deserializedMetadta.Version.CompoundAccession, metadata.Version.CompoundAccession);
            Assert.AreEqual(deserializedMetadta.Version.GINumber, metadata.Version.GINumber);
            Assert.AreEqual(deserializedMetadta.Version.Version, metadata.Version.Version);
        }
コード例 #10
0
        public void openProject(String file)
        {
            Sequence sequence = null;

            parser = SequenceParsers.GenBank;
            parser.Open(file);
            sequence = (Sequence)parser.Parse().ToList()[0];
            parser.Close();

            Fragment        project = new Fragment(file, "project", sequence);
            GenBankMetadata meta    = sequence.Metadata["GenBank"] as GenBankMetadata;

            FragmentDict = new Dictionary <string, Fragment>();
            foreach (var feat in meta.Features.MiscFeatures)
            {
                String subseq = project.GetString().Substring(feat.Location.LocationStart - 1, feat.Location.LocationEnd - feat.Location.LocationStart + 1);
                FragmentDict.Add(feat.StandardName, new Fragment(file, feat.StandardName, new Sequence(Alphabets.DNA, subseq)));
            }
        }
コード例 #11
0
        /// <summary>
        /// Extracts supposed sequence name from metadata.
        /// </summary>
        /// <param name="metadata">
        /// The metadata.
        /// </param>
        /// <returns>
        /// Supposed name as <see cref="string"/>.
        /// </returns>
        /// <exception cref="Exception">
        /// Thrown if all name fields are contradictory.
        /// </exception>
        private static string ExtractMatterName(GenBankMetadata metadata)
        {
            string species    = metadata.Source.Organism.Species.GetLargestRepeatingSubstring();
            string commonName = metadata.Source.CommonName;
            string definition = metadata.Definition.TrimEnd(", complete genome.")
                                .TrimEnd(", complete sequence.")
                                .TrimEnd(", complete CDS.")
                                .TrimEnd(", complete cds.")
                                .TrimEnd(", genome.");

            if (commonName.Contains(species) || species.IsSubsetOf(commonName))
            {
                if (definition.Contains(commonName) || commonName.IsSubsetOf(definition))
                {
                    return(definition);
                }

                if (commonName.Contains(definition) || definition.IsSubsetOf(commonName))
                {
                    return(commonName);
                }

                return($"{commonName} | {definition}");
            }

            if (species.Contains(commonName) || commonName.IsSubsetOf(species))
            {
                if (definition.Contains(species) || species.IsSubsetOf(definition))
                {
                    return(definition);
                }

                if (species.Contains(definition) || definition.IsSubsetOf(species))
                {
                    return(species);
                }

                return($"{species} | {definition}");
            }

            throw new Exception($"Sequences names are not equal. CommonName = {commonName}, Species = {species}, Definition = {definition}");
        }
コード例 #12
0
        public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos)
        {
            // BLAST reports are saved in individual files by query and
            // numbered in the same order as they appear in the input FASTA file.
            int[] annotatedIndex = new int[2];
            annotatedIndex[0] = -1;
            annotatedIndex[1] = -1;

            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";

            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser      blastParser  = new BlastXmlParser();
            IList <BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser       gbParser     = new GenBankParser();

            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {
                            Hit blastHit = record.Hits[i];
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {
                                Hsp    blastHsp      = blastHit.Hsps[j];
                                double percentId     = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;

                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    long        gi     = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem  = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string      gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";

                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence           gbRecord = gbParser.ParseOne(gbFile);
                                        GenBankMetadata     gbMeta   = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        IList <FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem         bestItem = getBestFeatureItem(features);
                                        if (bestItem != null)
                                        {
                                            annotatedIndex[0] = i;
                                            annotatedIndex[1] = j;
                                            return(annotatedIndex);
                                        }
                                    }

                                    catch
                                    {
                                        Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                    }
                                    hitsProcessed += 1;
                                }
                            }
                        }
                    }
                }
            }

            return(annotatedIndex);
        }
コード例 #13
0
        public void TestFeatureItem()
        {
            ISequence     seq;
            ISequence     featureSeq       = null;
            GenBankParser parser           = new GenBankParser();
            string        _genBankDataPath = @"TestUtils\GenBank";

            seq = parser.ParseOne(_genBankDataPath + @"\BK000016-tpa.gbk");
            GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata;

            #region Test GetSubSequence Method

            featureSeq = metadata.Features.All[0].GetSubSequence(seq);
            int start = metadata.Features.All[0].Location.Start - 1;
            int end   = metadata.Features.All[0].Location.End - start;
            Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString());
            featureSeq = metadata.Features.All[1].GetSubSequence(seq);
            start      = metadata.Features.All[1].Location.Start - 1;
            end        = metadata.Features.All[1].Location.End - start;
            Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString());


            seq = new Sequence(Alphabets.DNA, "ACGTAAAGGT");
            Sequence        refSeq     = new Sequence(Alphabets.DNA, "AAAAATTTT");
            LocationBuilder locbuilder = new LocationBuilder();
            ILocation       loc        = locbuilder.GetLocation("join(complement(4..8),Ref1:5..7)");
            Assert.AreEqual("join(complement(4..8),Ref1:5..7)", locbuilder.GetLocationString(loc));
            FeatureItem fi = new FeatureItem("Feature1", loc);
            Dictionary <string, ISequence> refSeqs = new Dictionary <string, ISequence>();
            refSeqs.Add("Ref1", refSeq);
            ISequence result = fi.GetSubSequence(seq, refSeqs);
            Assert.AreEqual("ATTTCATT", result.ToString());
            #endregion

            #region Test GetSubFeatures Method
            SequenceFeatures seqFeatures = new SequenceFeatures();
            FeatureItem      source      = new FeatureItem("Source", "1..1509");
            FeatureItem      mRNA        = new FeatureItem("mRNA", "join(10..567,789..1320)");
            FeatureItem      cds         = new FeatureItem("CDS", "join(54..567,789..1254)");
            FeatureItem      exon1       = new FeatureItem("Exon", "10..567");
            FeatureItem      intron      = new FeatureItem("Intron", "568..788");
            FeatureItem      exon2       = new FeatureItem("Exon", "789..1320");

            seqFeatures.All.Add(source);
            seqFeatures.All.Add(mRNA);
            seqFeatures.All.Add(cds);
            seqFeatures.All.Add(exon1);
            seqFeatures.All.Add(intron);
            seqFeatures.All.Add(exon2);
            List <FeatureItem> subFeatures = source.GetSubFeatures(seqFeatures);
            Assert.AreEqual(5, subFeatures.Count);
            subFeatures = mRNA.GetSubFeatures(seqFeatures);
            Assert.AreEqual(4, subFeatures.Count);
            subFeatures = cds.GetSubFeatures(seqFeatures);
            Assert.AreEqual(1, subFeatures.Count);
            subFeatures = exon1.GetSubFeatures(seqFeatures);
            Assert.AreEqual(0, subFeatures.Count);
            subFeatures = intron.GetSubFeatures(seqFeatures);
            Assert.AreEqual(0, subFeatures.Count);
            subFeatures = exon2.GetSubFeatures(seqFeatures);
            Assert.AreEqual(0, subFeatures.Count);

            #endregion
        }
コード例 #14
0
        public void GenBankMetadataClone()
        {
            LocationBuilder locBuilder = new LocationBuilder();
            GenBankMetadata metadata   = new GenBankMetadata();

            metadata.Accession         = new GenBankAccession();
            metadata.Accession.Primary = "PAccession";
            metadata.Accession.Secondary.Add("SAccession1");
            metadata.Accession.Secondary.Add("SAccession2");
            metadata.BaseCount = "a 1 c 2";
            metadata.Comments.Add("Comment1");
            metadata.Comments.Add("Comment2");
            metadata.Contig      = "Contig Info";
            metadata.DBLink      = new CrossReferenceLink();
            metadata.DBLink.Type = CrossReferenceType.Project;
            metadata.DBLink.Numbers.Add("100");
            metadata.DBLink.Numbers.Add("200");
            metadata.DBSource   = "DbSourceInfo";
            metadata.Definition = "Defination info";
            metadata.Features   = new SequenceFeatures();
            FeatureItem   feature         = new FeatureItem("feature1", "1");
            List <string> qualifierValues = new List <string>();

            qualifierValues.Add("qualifier1value1");
            qualifierValues.Add("qualifier1value2");
            feature.Qualifiers.Add("qualifier1", qualifierValues);
            metadata.Features.All.Add(feature);

            feature         = new FeatureItem("feature2", "2");
            qualifierValues = new List <string>();
            qualifierValues.Add("qualifier2value1");
            qualifierValues.Add("qualifier2value2");
            feature.Qualifiers.Add("qualifier2", qualifierValues);
            metadata.Features.All.Add(feature);
            feature         = new FeatureItem("feature2", "2");
            qualifierValues = new List <string>();
            qualifierValues.Add("qualifier2value1");
            qualifierValues.Add("qualifier2value2");
            feature.Qualifiers.Add("qualifier2", qualifierValues);
            metadata.Features.All.Add(feature);

            metadata.Keywords             = "keywords data";
            metadata.Locus                = new GenBankLocusInfo();
            metadata.Locus.Date           = DateTime.Now;
            metadata.Locus.DivisionCode   = SequenceDivisionCode.CON;
            metadata.Locus.MoleculeType   = MoleculeType.DNA;
            metadata.Locus.Name           = "LocusName";
            metadata.Locus.SequenceLength = 100;
            metadata.Locus.SequenceType   = "bp";
            metadata.Locus.Strand         = SequenceStrandType.Double;
            metadata.Locus.StrandTopology = SequenceStrandTopology.Linear;
            metadata.Origin               = "origin info";
            metadata.Primary              = "Primary info";
            metadata.Project              = new ProjectIdentifier();
            metadata.Project.Name         = "Project1";
            metadata.Project.Numbers.Add("101");
            metadata.Project.Numbers.Add("201");
            CitationReference reference = new CitationReference();

            reference.Authors     = "Authors";
            reference.Consortiums = "Consortiums";
            reference.Journal     = "Journal";
            reference.Location    = "3";
            reference.Medline     = "Medline info";
            reference.Number      = 1;
            reference.PubMed      = "pubmid";
            reference.Remarks     = "remarks";
            reference.Title       = "Title of the book";
            metadata.References.Add(reference);
            reference             = new CitationReference();
            reference.Authors     = "Authors";
            reference.Consortiums = "Consortiums";
            reference.Journal     = "Journal";
            reference.Location    = "4";
            reference.Medline     = "Medline info";
            reference.Number      = 2;
            reference.PubMed      = "pubmid";
            reference.Remarks     = "remarks";
            reference.Title       = "Title of the book";
            metadata.References.Add(reference);
            metadata.Segment                     = new SequenceSegment();
            metadata.Segment.Count               = 2;
            metadata.Segment.Current             = 1;
            metadata.Source                      = new SequenceSource();
            metadata.Source.CommonName           = "ABC Xyz";
            metadata.Source.Organism.Genus       = "ABC";
            metadata.Source.Organism.Species     = "Xyz";
            metadata.Source.Organism.ClassLevels = "123 123";
            metadata.Version                     = new GenBankVersion();
            metadata.Version.Accession           = "PAccession";
            metadata.Version.Version             = "1";
            metadata.Version.GINumber            = "12345";

            GenBankMetadata clonemetadta = metadata.Clone();

            Assert.AreEqual(clonemetadta.Accession.Primary, metadata.Accession.Primary);

            for (int i = 0; i < clonemetadta.Accession.Secondary.Count; i++)
            {
                Assert.AreEqual(clonemetadta.Accession.Secondary[i], metadata.Accession.Secondary[i]);
            }

            Assert.AreEqual(clonemetadta.BaseCount, metadata.BaseCount);

            for (int i = 0; i < clonemetadta.Comments.Count; i++)
            {
                Assert.AreEqual(clonemetadta.Comments[i], metadata.Comments[i]);
            }

            Assert.AreEqual(clonemetadta.Contig, metadata.Contig);

            Assert.AreEqual(clonemetadta.DBLink.Type, metadata.DBLink.Type);

            for (int i = 0; i < clonemetadta.DBLink.Numbers.Count; i++)
            {
                Assert.AreEqual(clonemetadta.DBLink.Numbers[i], metadata.DBLink.Numbers[i]);
            }

            Assert.AreEqual(clonemetadta.DBSource, metadata.DBSource);
            Assert.AreEqual(clonemetadta.Definition, metadata.Definition);

            for (int i = 0; i < clonemetadta.Features.All.Count; i++)
            {
                Assert.AreEqual(clonemetadta.Features.All[i].Key, metadata.Features.All[i].Key);
                Assert.AreEqual(locBuilder.GetLocationString(clonemetadta.Features.All[i].Location), locBuilder.GetLocationString(metadata.Features.All[i].Location));

                foreach (KeyValuePair <string, List <string> > kvp in clonemetadta.Features.All[i].Qualifiers)
                {
                    if (metadata.Features.All[i].Qualifiers.ContainsKey(kvp.Key))
                    {
                        if (kvp.Value == null)
                        {
                            Assert.IsNull(metadata.Features.All[i].Qualifiers[kvp.Key]);
                        }
                        else
                        {
                            for (int j = 0; j < kvp.Value.Count; j++)
                            {
                                Assert.AreEqual(kvp.Value[j], metadata.Features.All[i].Qualifiers[kvp.Key][j]);
                            }
                        }
                    }
                    else
                    {
                        Assert.Fail();
                    }
                }
            }

            Assert.AreEqual(clonemetadta.Keywords, metadata.Keywords);
            Assert.AreEqual(clonemetadta.Locus.Date, metadata.Locus.Date);
            Assert.AreEqual(clonemetadta.Locus.DivisionCode, metadata.Locus.DivisionCode);
            Assert.AreEqual(clonemetadta.Locus.MoleculeType, metadata.Locus.MoleculeType);
            Assert.AreEqual(clonemetadta.Locus.Name, metadata.Locus.Name);
            Assert.AreEqual(clonemetadta.Locus.SequenceLength, metadata.Locus.SequenceLength);
            Assert.AreEqual(clonemetadta.Locus.SequenceType, metadata.Locus.SequenceType);
            Assert.AreEqual(clonemetadta.Locus.Strand, metadata.Locus.Strand);
            Assert.AreEqual(clonemetadta.Locus.StrandTopology, metadata.Locus.StrandTopology);
            Assert.AreEqual(clonemetadta.Origin, clonemetadta.Origin);
            Assert.AreEqual(clonemetadta.Primary, metadata.Primary);
            Assert.AreEqual(clonemetadta.Project.Name, metadata.Project.Name);
            for (int i = 0; i < clonemetadta.Project.Numbers.Count; i++)
            {
                Assert.AreEqual(clonemetadta.Project.Numbers[i], metadata.Project.Numbers[i]);
            }

            for (int i = 0; i < clonemetadta.References.Count; i++)
            {
                Assert.AreEqual(clonemetadta.References[i].Authors, metadata.References[i].Authors);
                Assert.AreEqual(clonemetadta.References[i].Consortiums, metadata.References[i].Consortiums);
                Assert.AreEqual(clonemetadta.References[i].Journal, metadata.References[i].Journal);
                Assert.AreEqual(clonemetadta.References[i].Location, metadata.References[i].Location);
                Assert.AreEqual(clonemetadta.References[i].Medline, metadata.References[i].Medline);
                Assert.AreEqual(clonemetadta.References[i].Number, metadata.References[i].Number);
                Assert.AreEqual(clonemetadta.References[i].PubMed, metadata.References[i].PubMed);
                Assert.AreEqual(clonemetadta.References[i].Remarks, metadata.References[i].Remarks);
                Assert.AreEqual(clonemetadta.References[i].Title, metadata.References[i].Title);
            }

            Assert.AreEqual(clonemetadta.Segment.Current, metadata.Segment.Current);
            Assert.AreEqual(clonemetadta.Segment.Count, metadata.Segment.Count);
            Assert.AreEqual(clonemetadta.Source.CommonName, metadata.Source.CommonName);
            Assert.AreEqual(clonemetadta.Source.Organism.ClassLevels, metadata.Source.Organism.ClassLevels);
            Assert.AreEqual(clonemetadta.Source.Organism.Genus, metadata.Source.Organism.Genus);
            Assert.AreEqual(clonemetadta.Source.Organism.Species, metadata.Source.Organism.Species);
            Assert.AreEqual(clonemetadta.Version.Accession, metadata.Version.Accession);
            Assert.AreEqual(clonemetadta.Version.CompoundAccession, metadata.Version.CompoundAccession);
            Assert.AreEqual(clonemetadta.Version.GINumber, metadata.Version.GINumber);
            Assert.AreEqual(clonemetadta.Version.Version, metadata.Version.Version);
        }
コード例 #15
0
        /// <summary>
        /// Gives out string array of metadata and features just below metadata.
        /// </summary>
        /// <param name="metadata">GenBank Metadata</param>
        /// <returns>string array of metadata</returns>
        public static string[,] GenBankMetadataToRange(GenBankMetadata metadata)
        {
            List <string[]> excelData = new List <string[]>();
            List <string>   excelRow  = new List <string>();

            // Add the metadata headers
            excelRow.Add(Properties.Resources.GenbankMetadataHeader);
            excelData.Add(excelRow.ToArray());
            excelRow.Clear();

            if (metadata.Locus != null)
            {
                excelData.Add(new string[] { Properties.Resources.GenbankMetadataLocus });
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataName, metadata.Locus.Name);
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataSeqLength, metadata.Locus.SequenceLength.ToString());
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataSeqType, metadata.Locus.SequenceType.ToString());
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataStrandType, Helper.GetStrandType(metadata.Locus.Strand));
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataMoleculeType, metadata.Locus.MoleculeType.ToString());
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataStrandTopology, Helper.GetStrandTopology(metadata.Locus.StrandTopology));
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataDivisionCode, metadata.Locus.DivisionCode.ToString());
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataDate, metadata.Locus.Date.ToString("dd-MMM-yyyy").ToUpper());
            }

            if (!string.IsNullOrWhiteSpace(metadata.Definition))
            {
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataDefinition, "", metadata.Definition);
            }

            if (metadata.Accession != null)
            {
                string secondaryAccession = string.Empty;
                foreach (string accession2 in metadata.Accession.Secondary)
                {
                    secondaryAccession += accession2 == null ? " " : " " + accession2;
                }
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataAccession, "", metadata.Accession.Primary + secondaryAccession);
            }

            if (metadata.DbLink != null)
            {
                string linkNumbers = string.Empty;
                foreach (string linkNumber in metadata.DbLink.Numbers)
                {
                    linkNumbers += linkNumber + ",";
                }
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataDBLink, "", metadata.DbLink.Type.ToString() + ":" + linkNumbers);
            }

            if (!string.IsNullOrWhiteSpace(metadata.DbSource))
            {
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataDBSource, "", metadata.DbSource);
            }

            if (metadata.Version != null)
            {
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataVersion, "",
                                 (metadata.Version.Accession == null ? string.Empty : metadata.Version.Accession) + "." +
                                 (metadata.Version.Version == null ? string.Empty : metadata.Version.Version) + " " +
                                 Properties.Resources.GenbankMetadataGI + (metadata.Version.GiNumber == null ? string.Empty : metadata.Version.GiNumber));
            }

            if (metadata.Segment != null)
            {
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataSegment, "", metadata.Segment.Current + " of " + metadata.Segment.Count);
            }

            AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataKeywords, "", metadata.Keywords);

            if (metadata.Source != null)
            {
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataSource, "",
                                 metadata.Source.CommonName == null ? string.Empty : metadata.Source.CommonName);
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataOrganism,
                                 (metadata.Source.Organism.Genus == null ? string.Empty : metadata.Source.Organism.Genus) + " " +
                                 (metadata.Source.Organism.Species == null ? string.Empty : metadata.Source.Organism.Species));
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataClassLevels,
                                 metadata.Source.Organism.ClassLevels == null ? string.Empty : metadata.Source.Organism.ClassLevels);
            }

            foreach (CitationReference reference in metadata.References)
            {
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataReference, "", reference.Number.ToString() + " (" + reference.Location + ")");
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataAuthors, reference.Authors);
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataTitle, reference.Title);
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataJournal, reference.Journal);
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataConsortiums, reference.Consortiums);
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataMedLine, reference.Medline);
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataPubMed, reference.PubMed);
                AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataRemarks, reference.Remarks);
            }

            if (!string.IsNullOrWhiteSpace(metadata.Primary))
            {
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataPrimary, "", metadata.Primary);
            }

            if (metadata.Comments != null && metadata.Comments.Count > 0)
            {
                StringBuilder strbuilder = null;

                foreach (string str in metadata.Comments)
                {
                    if (strbuilder == null)
                    {
                        strbuilder = new StringBuilder();
                    }
                    else
                    {
                        strbuilder.Append(Environment.NewLine);
                    }

                    strbuilder.Append(str);
                }

                if (strbuilder != null)
                {
                    AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataComment, "", strbuilder.ToString());
                }
            }

            if (metadata.Features != null)
            {
                // Add the metadata headers
                excelRow.Add(Properties.Resources.GenbankFeaturesHeader);
                excelData.Add(excelRow.ToArray());
                excelRow.Clear();

                IList <FeatureItem> featureList = metadata.Features.All;
                foreach (FeatureItem featureItem in featureList)
                {
                    LocationBuilder locBuilder = new LocationBuilder();
                    // Add the feature headers
                    excelRow.Add(featureItem.Key);
                    excelRow.Add(""); // skip one column
                    excelRow.Add(locBuilder.GetLocationString(featureItem.Location));
                    excelData.Add(excelRow.ToArray());
                    excelRow.Clear();

                    foreach (string key in featureItem.Qualifiers.Keys)
                    {
                        foreach (string value in featureItem.Qualifiers[key])
                        {
                            AddNameValuePair(excelData, 1, key, value);
                        }
                    }
                }
            }

            if (!string.IsNullOrWhiteSpace(metadata.BaseCount))
            {
                AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataBaseCount, "", metadata.BaseCount);
            }

            return(ConvertToArray(excelData));
        }
コード例 #16
0
ファイル: NcbiHelper.cs プロジェクト: unclave/libiada-web
        /// <summary>
        /// Extracts features from genBank file downloaded from ncbi.
        /// </summary>
        /// <param name="id">
        /// Accession id of the sequence in ncbi (remote id).
        /// </param>
        /// <returns>
        /// The <see cref="List{FeatureItem}"/>.
        /// </returns>
        public static List <FeatureItem> GetFeatures(string id)
        {
            GenBankMetadata metadata = GetMetadata(DownloadGenBankSequence(id));

            return(metadata.Features.All);
        }
コード例 #17
0
        public void GenBankFormatterValidateFormatTextWriter()
        {
            InitializeXmlVariables();
            // Create a Sequence with all attributes.
            // parse and update the properties instead of parsing entire file.
            ISequenceParser   parser1  = new GenBankParser();
            IList <ISequence> seqList1 = parser1.Parse(FilePath);

            string expectedUpdatedSequence =
                ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", "");
            Sequence orgSeq =
                new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence);

            orgSeq.Metadata.Add("GenBank",
                                (GenBankMetadata)seqList1[0].Metadata["GenBank"]);
            orgSeq.ID           = seqList1[0].ID;
            orgSeq.DisplayID    = seqList1[0].DisplayID;
            orgSeq.MoleculeType = seqList1[0].MoleculeType;

            ISequenceFormatter formatter = new GenBankFormatter();

            using (TextWriter writer =
                       new StreamWriter(Constants.GenBankTempFileName))
            {
                formatter.Format(orgSeq, writer);
            }

            // parse
            GenBankParser     parserObj = new GenBankParser();
            IList <ISequence> seqList   = parserObj.Parse(Constants.GenBankTempFileName);

            ISequence seq = seqList[0];

            // test the non-metadata properties
            if (0 == string.Compare(IsSequenceReadOnly, "true",
                                    false, CultureInfo.CurrentCulture))
            {
                Assert.IsTrue(seq.IsReadOnly);
                ApplicationLog.WriteLine(
                    "GenBank Formatter BVT: Successfully validated the ReadOnly Property");
            }

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence));

            File.Delete(Constants.GenBankTempFileName);
        }
コード例 #18
0
        public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection)
        {
            string queryName = rec.DisplayID.ToString().Split(' ')[0];

            // BLAST reports are saved in individual files by query and
            // numbered in the same order as they appear in the input FASTA file.
            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";

            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser      blastParser  = new BlastXmlParser();
            IList <BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser       gbParser     = new GenBankParser();

            int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos);

            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {
                            Hit blastHit = record.Hits[i];
                            // For each HSP
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {
                                Hsp    blastHsp      = blastHit.Hsps[j];
                                double percentId     = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;
                                string txt           = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity,
                                                                     Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage,
                                                                     hitsProcessed, Up.BlastMaxNumHits);
                                // if HSP passes user-defined thresholds
                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    string nextScore = "no";
                                    if ((i + 1) < record.Hits.Count())
                                    {
                                        if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score)
                                        {
                                            nextScore = "less than";
                                        }
                                        else
                                        {
                                            nextScore = "equal";
                                        }
                                    }
                                    else
                                    {
                                        nextScore = "non existent";
                                    }

                                    // parse GI numner from hit
                                    long        gi     = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem  = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string      gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";
                                    // init item
                                    string   img          = "#" + itemId.ToString();
                                    Item     item         = new Item(itemId, img);
                                    string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                                    item.Name        = headerTokens[0];
                                    item.Description = headerTokens[1];

                                    // write pairwise alignment
                                    writePairwiseAlignment(Up, blastHit, j, itemId);

                                    // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record
                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence gbRecord = gbParser.ParseOne(gbFile);
                                        item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord);
                                        GenBankMetadata     gbMeta   = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        CodingSequence      bestCds  = null;
                                        IList <FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem         bestItem = getBestFeatureItem(features);


                                        if (gbMeta.Features.CodingSequences.Count > 0)
                                        {
                                            bestCds = gbMeta.Features.CodingSequences[0];
                                        }

                                        for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++)
                                        {
                                            CodingSequence cds = gbMeta.Features.CodingSequences[k];
                                            //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start);
                                            int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start);
                                            int cdsSize  = Math.Abs(cds.Location.End - cds.Location.Start);
                                            if (cdsSize > bestSize)
                                            {
                                                bestCds = cds;
                                            }
                                        }
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                            case "InputOrder":
                                                facet = new Facet(f.Name, f.Type, seqPos);
                                                break;

                                            case "QuerySequence":
                                                facet = new Facet(f.Name, f.Type, rec.ToString());
                                                break;

                                            case "NextScore":
                                                facet = new Facet(f.Name, f.Type, nextScore);
                                                break;

                                            case "Annotated":
                                                string value = "na";
                                                if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                {
                                                    value = "top_annotated";
                                                }
                                                else
                                                {
                                                    if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                    {
                                                        value = "top_unannotated";
                                                    }
                                                    else
                                                    {
                                                        if (bestItem != null)
                                                        {
                                                            value = "annotated";
                                                        }
                                                        else
                                                        {
                                                            value = "unannotated";
                                                        }
                                                    }
                                                }
                                                facet = new Facet(f.Name, f.Type, value);
                                                break;

                                            default:
                                                //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank);
                                                facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank);
                                                break;
                                            }

                                            /*
                                             * if (f.Name == "InputOrder")
                                             * {
                                             *  facet = new Facet(f.Name, f.Type, seqPos);
                                             * }
                                             *
                                             * else
                                             * {
                                             *  facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item);
                                             * }
                                             */
                                            item.Facets.Add(facet);
                                        }
                                    }
                                    //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above)
                                    catch
                                    {
                                        Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        item.Href = "#";
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                            case ("InputOrder"):
                                                facet = new Facet(f.Name, f.Type, seqPos);
                                                break;

                                            case "QuerySequence":
                                                facet = new Facet(f.Name, f.Type, rec.ToString());
                                                break;

                                            case ("NextScore"):
                                                facet = new Facet(f.Name, f.Type, "no");
                                                break;

                                            case "Annotated":
                                                string value = "na";
                                                if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                {
                                                    value = "top_annotated";
                                                }
                                                else
                                                {
                                                    if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                    {
                                                        value = "top_unannotated";
                                                    }
                                                    else
                                                    {
                                                        value = "unannotated";
                                                    }
                                                }
                                                facet = new Facet(f.Name, f.Type, value);
                                                break;

                                            default:
                                                facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank);
                                                break;
                                            }
                                            item.Facets.Add(facet);
                                        }
                                        //throw (e);
                                    }
                                    // Add item to collection, increment to next item,
                                    collection.Items.Add(item);
                                    hitsProcessed += 1;
                                    itemId        += 1;
                                }
                            }
                        }
                    }
                    if ((record.Hits.Count()) == 0 || (hitsProcessed == 0))
                    {
                        // Init Pivot item
                        string img  = "#" + itemId.ToString();
                        Item   item = new Item(itemId, img);
                        item.Href = "#";
                        string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                        item.Name        = headerTokens[0];
                        item.Description = headerTokens[1];

                        // Write pairwise alignment to file.
                        writePairwiseAlignment(Up, itemId);

                        // Set facet values for each facet category to default values
                        foreach (FacetCategory f in Up.FacetCategories)
                        {
                            Facet facet = new Facet();
                            switch (f.Name)
                            {
                            case ("InputOrder"):
                                facet = new Facet(f.Name, f.Type, seqPos);
                                break;

                            case ("QuerySequence"):
                                facet = new Facet(f.Name, f.Type, rec.ToString());
                                break;

                            default:
                                facet = CreateFacet(f.Name, f.Type, record, item, 0);
                                break;
                            }
                            item.Facets.Add(facet);
                        }

                        // Add item to collection, increment to next item, skip remaining code
                        collection.Items.Add(item);
                        itemId        += 1;
                        hitsProcessed += 1;
                    }
                }
            }
            return(itemId);
        }
コード例 #19
0
        public static string GetQualifierString(string featureItemKey, string qualifierKey, GenBankMetadata gbMeta)
        {
            Console.WriteLine("In GetQualifierString: " + featureItemKey + " " + qualifierKey);

            foreach (FeatureItem fi in gbMeta.Features.All)
            {
                if (fi.Key == featureItemKey)
                {
                    foreach (KeyValuePair <string, List <string> > q in fi.Qualifiers)
                    {
                        Console.WriteLine(featureItemKey + " " + q.Key + " " + q.Value[0]);
                        if (q.Key == qualifierKey)
                        {
                            return(q.Value[0].Trim('"'));
                        }
                    }
                }
            }
            return("N/A");
        }
コード例 #20
0
        public static string GetGenBankIdentifier(ISequence gb)
        {
            GenBankMetadata gbMeta = (GenBankMetadata)gb.Metadata["GenBank"];

            return(gbMeta.Version.GINumber);
        }
コード例 #21
0
ファイル: Construct.cs プロジェクト: KatarzynaRzosinska/Skaza
        /// <summary>
        /// Construct initialization.
        /// </summary>
        /// <param name="fragList">Fragment list.</param>
        /// <param name="maxOverlapLen">Minimum overlap length.</param>
        private void Init(ObservableCollection <Fragment> fragList, DesignerSettings settings)
        {
            this.Overlaps = new List <Overlap>();
            this.Settings = settings;
            //forward
            String             seq5     = "";
            String             seq3     = "";
            String             name     = "";
            List <MiscFeature> featList = new List <MiscFeature>();

            for (int i = 0; i < fragList.Count; i++)
            {
                name += fragList[i].Name;
                seq3  = fragList[i].GetString();
                int         len5         = Math.Min(settings.MaxOverlapLen, seq5.Length);
                int         len3         = Math.Min(settings.MaxGeneSpecificLen, seq3.Length);
                String      overlapping  = seq5.Substring(seq5.Length - len5, len5);
                String      geneSpecific = seq3.Substring(0, len3);
                String      loc          = (seq5.Length + 1).ToString() + ".." + (seq5.Length + seq3.Length).ToString();
                MiscFeature gene         = new MiscFeature(loc);
                gene.StandardName = fragList[i].Name;
                featList.Add(gene);
                seq5 += seq3;
                if (i == 0)
                {
                    Overlaps.Add(new Overlap(fragList[i].Name + "_fwd", new Sequence(Alphabets.DNA, geneSpecific)));
                }
                else
                {
                    Overlaps.Add(new Overlap(fragList[i].Name + "_fwd", new Sequence(Alphabets.DNA, overlapping), new Sequence(Alphabets.DNA, geneSpecific)));
                }
            }

            this.Sequence = new Sequence(Alphabets.DNA, seq5);
            //meta
            GenBankMetadata meta = new GenBankMetadata();

            meta.Locus = new GenBankLocusInfo();
            meta.Locus.MoleculeType   = MoleculeType.DNA;
            meta.Locus.Name           = name;
            meta.Locus.Date           = System.DateTime.Now;
            meta.Locus.SequenceLength = seq5.Length;
            meta.Comments.Add("designed with mufasa");
            meta.Definition = "synthetic construct";
            meta.Features   = new SequenceFeatures();
            meta.Features.All.AddRange(featList);
            this.Sequence.Metadata.Add("GenBank", meta);

            //reverse
            fragList.Add(new Fragment(fragList[0]));
            fragList.RemoveAt(0);
            seq5 = "";
            seq3 = "";
            for (int i = fragList.Count - 1; i >= 0; i--)
            {
                seq5 = fragList[i].GetReverseComplementString();
                int    len3         = Math.Min(settings.MaxOverlapLen, seq3.Length);
                int    len5         = Math.Min(settings.MaxGeneSpecificLen, seq5.Length);
                String overlapping  = seq3.Substring(seq3.Length - len3, len3);
                String geneSpecific = seq5.Substring(0, len5);
                seq3 += seq5;
                if (i == fragList.Count - 1)
                {
                    Overlaps.Add(new Overlap(fragList[i].Name + "_rev", new Sequence(Alphabets.DNA, geneSpecific)));
                }
                else
                {
                    Overlaps.Add(new Overlap(fragList[i].Name + "_rev", new Sequence(Alphabets.DNA, overlapping), new Sequence(Alphabets.DNA, geneSpecific)));
                }
            }
            TermoOptimizeOverlaps();
        }
コード例 #22
0
        //public static Facet CreateFacet(string fName, string fType, BlastSearchRecord rec, int hitId, int hspId, ISequence gb, Item item, string NCBIurl, CodingSequence bestCds, int rank)
        public static Facet CreateFacet(string fName, string fType, BlastSearchRecord rec, int hitId, int hspId, ISequence gb, Item item, string NCBIurl, FeatureItem bestItem, int rank)
        {
            Hit             hit    = rec.Hits[hitId];
            Hsp             hsp    = hit.Hsps[hspId];
            GenBankMetadata gbMeta = (GenBankMetadata)gb.Metadata["GenBank"];

            string[] classLevels;
            switch (fName)
            {
            case "QueryName":
                return(new Facet(fName, fType, item.Name));

            case "QueryLen":
                return(new Facet(fName, fType, rec.IterationQueryLength));

            case "Rank":
                return(new Facet(fName, fType, rank));

            case "Score":
                return(new Facet(fName, fType, Math.Round(hsp.BitScore, 1)));

            case "Identity":
                double pi = (hsp.IdentitiesCount / (double)hsp.AlignmentLength) * 100.0;
                return(new Facet(fName, fType, Math.Round(pi, 0)));

            case "Span":
                double sp = ((hsp.QueryEnd - hsp.QueryStart + 1) / (double)rec.IterationQueryLength) * 100.0;
                return(new Facet(fName, fType, Math.Round(sp, 0)));

            case "SubjStart":
                double subjStart = hsp.HitStart;
                return(new Facet(fName, fType, Math.Round(subjStart, 0)));

            case "SubjLen":
                double subjLen = hit.Length;
                return(new Facet(fName, fType, Math.Round(subjLen, 0)));

            case "Strand":
                string strand = FrameToStrand(hsp.QueryFrame) + "/" + FrameToStrand(hsp.HitFrame);
                return(new Facet(fName, fType, strand));

            case "Species":
                int index = gbMeta.Source.Organism.Species.IndexOf(" ", StringComparison.Ordinal);
                if (index > 0)
                {
                    return(new Facet(fName, fType, gbMeta.Source.Organism.Genus + " " + gbMeta.Source.Organism.Species.Substring(0, index)));
                }
                else
                {
                    return(new Facet(fName, fType, gbMeta.Source.Organism.Genus + " " + gbMeta.Source.Organism.Species));
                }

            case "Kingdom":
                classLevels = gbMeta.Source.Organism.ClassLevels.Split(';');
                if (classLevels.Length >= 1)
                {
                    return(new Facet(fName, fType, classLevels[0]));
                }
                else
                {
                    return(new Facet(fName, fType, "N/A"));
                }

            case "Phylum":
                classLevels = gbMeta.Source.Organism.ClassLevels.Split(';');
                if (classLevels.Length >= 2)
                {
                    return(new Facet(fName, fType, classLevels[1]));
                }
                else
                {
                    return(new Facet(fName, fType, "N/A"));
                }

            case "Class":
                classLevels = gbMeta.Source.Organism.ClassLevels.Split(';');
                if (classLevels.Length >= 3)
                {
                    return(new Facet(fName, fType, classLevels[2]));
                }
                else
                {
                    return(new Facet(fName, fType, "N/A"));
                }

            case "Order":
                classLevels = gbMeta.Source.Organism.ClassLevels.Split(';');
                if (classLevels.Length >= 4)
                {
                    return(new Facet(fName, fType, classLevels[3]));
                }
                else
                {
                    return(new Facet(fName, fType, "N/A"));
                }

            case "Family":
                classLevels = gbMeta.Source.Organism.ClassLevels.Split(';');
                if (classLevels.Length >= 5)
                {
                    return(new Facet(fName, fType, classLevels[4]));
                }
                else
                {
                    return(new Facet(fName, fType, "N/A"));
                }

            case "Lineage":
                return(new Facet(fName, fType, gbMeta.Source.Organism.ClassLevels.ToString()));

            case "Organism":
                return(new Facet(fName, fType, gbMeta.Source.CommonName));

            // return new Facet(fName, fType, gbMeta.Source.Organism.Genus + " " + gbMeta.Source.Organism.Species);
            case "Genus":
                return(new Facet(fName, fType, gbMeta.Source.Organism.ClassLevels.Split(';').Last().Trim().TrimEnd('.')));

            case "Gene":
                string name = "N/A";
                //if (bestCds != null)
                if (bestItem != null)
                {
                    //CodingSequence feature = bestCds;
                    FeatureItem feature = bestItem;
                    String      geneSym = "N/A";
                    foreach (KeyValuePair <string, List <String> > qualifier in feature.Qualifiers)
                    {
                        if (qualifier.Key == "gene")
                        {
                            geneSym = qualifier.Value[0].ToString().Trim('"');
                        }
                    }

                    if (geneSym != "")
                    {
                        name = geneSym;
                        string url2 = System.Web.HttpUtility.HtmlEncode("http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=search&db=gene&term=" + name + "%5Bsym%5D");
                        return(new Facet(fName, fType, name, url2));
                    }
                }
                return(new Facet(fName, fType, name, null));

            case "GI":
                return(new Facet(fName, fType, gbMeta.Version.GINumber, NCBIurl + gbMeta.Version.GINumber));

            case "Accession":
                return(new Facet(fName, fType, gbMeta.Version.CompoundAccession, NCBIurl + gbMeta.Version.CompoundAccession));

            case "Definition":
                return(new Facet(fName, fType, gbMeta.Definition));

            case "EValue":
                return(new Facet(fName, fType, String.Format("{0:#e+00}", hsp.EValue)));

            case "AlignLen":
                return(new Facet(fName, fType, hsp.AlignmentLength, @"txt\" + item.Id + ".txt"));

            case "RefCount":
                int i = 0;
                foreach (CitationReference r in gbMeta.References)
                {
                    if ((r.Title != "Direct Submission") && (r.Journal != "Unpublished"))
                    {
                        i++;
                    }
                }
                return(new Facet(fName, fType, i));

            case "References":
                if (gbMeta.References.Count() == 0)
                {
                    return(new Facet(fName, fType));
                }

                string url = CreateReferenceURL(gbMeta.References[0]);
                Facet  f   = new Facet(fName, fType);

                if (gbMeta.References.Count() > 0)
                {
                    int j = 1;
                    foreach (CitationReference r in gbMeta.References)
                    {
                        if (r.Title != "Direct Submission" && (r.Journal != "Unpublished"))
                        {
                            url = CreateReferenceURL(r);
                            f.Add(new FacetValue(f.Type, String.Format("{0}. {1}. {2}.", j, r.Title, r.Journal), url));
                            j++;
                        }
                    }
                }
                return(f);

            case "SubmissionDate":
                DateTime dt = new DateTime(gbMeta.Locus.Date.Year, gbMeta.Locus.Date.Month, gbMeta.Locus.Date.Day);
                return(new Facet(fName, fType, dt.ToUniversalTime().ToString("o")));

            case "Product":
                Facet productFacet = new Facet(fName, fType, GetQualifierString("Protein", "product", gbMeta));

                if (productFacet[0].Value == "N/A")
                {
                    Console.WriteLine(productFacet[0].Value + "!!!!!!!!!!!!!!!!!!!!!!!!!!***********");
                    if (bestItem != null)
                    {
                        productFacet = new Facet(fName, fType, GetQualifierStringFromCDS(bestItem, "product"));
                    }
                    Console.WriteLine(productFacet[0].Value + "!!!!!!!!!!!!!!!!!!!!!!!!&&&&&&&&&&&&&");
                }
                return(productFacet);

            case "Function":
                Facet funcFacet = new Facet(fName, fType, GetQualifierString("Protein", "function", gbMeta));
                if (funcFacet[0].Value == "N/A")
                {
                    if (bestItem != null)
                    {
                        funcFacet = new Facet(fName, fType, GetQualifierStringFromCDS(bestItem, "function"));
                    }
                }
                return(funcFacet);

            default:
                throw (new Exception("Facet category with name = " + fName + " does not exist."));
            }
        }
        public ActionResult Index(
            string searchQuery,
            bool importGenes,
            bool importPartial,
            bool filterMinLength,
            int minLength,
            bool filterMaxLength,
            int maxLength)
        {
            return(CreateTask(() =>
            {
                string searchResults;
                string[] accessions;
                List <NuccoreObject> nuccoreObjects;

                if (filterMinLength)
                {
                    searchResults = filterMaxLength ?
                                    NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength, maxLength: maxLength) :
                                    NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength);
                }
                else
                {
                    searchResults = filterMaxLength ?
                                    NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength: 1, maxLength: maxLength) :
                                    NcbiHelper.FormatNcbiSearchTerm(searchQuery);
                }
                nuccoreObjects = NcbiHelper.ExecuteESummaryRequest(searchResults, importPartial);
                accessions = nuccoreObjects.Select(no => no.AccessionVersion.Split('.')[0]).Distinct().ToArray();
                var importResults = new List <MatterImportResult>(accessions.Length);

                using (var db = new LibiadaWebEntities())
                {
                    var matterRepository = new MatterRepository(db);
                    var dnaSequenceRepository = new GeneticSequenceRepository(db);

                    var(existingAccessions, accessionsToImport) = dnaSequenceRepository.SplitAccessionsIntoExistingAndNotImported(accessions);

                    importResults.AddRange(existingAccessions.ConvertAll(existingAccession => new MatterImportResult
                    {
                        MatterName = existingAccession,
                        Result = "Sequence already exists",
                        Status = "Exists"
                    }));

                    foreach (string accession in accessionsToImport)
                    {
                        var importResult = new MatterImportResult()
                        {
                            MatterName = accession
                        };

                        try
                        {
                            ISequence bioSequence = NcbiHelper.DownloadGenBankSequence(accession);
                            GenBankMetadata metadata = NcbiHelper.GetMetadata(bioSequence);
                            importResult.MatterName = metadata.Version.CompoundAccession;

                            Matter matter = matterRepository.CreateMatterFromGenBankMetadata(metadata);

                            importResult.SequenceType = matter.SequenceType.GetDisplayValue();
                            importResult.Group = matter.Group.GetDisplayValue();
                            importResult.MatterName = matter.Name;
                            importResult.AllNames = $"Common name = {metadata.Source.CommonName}, "
                                                    + $"Species = {metadata.Source.Organism.Species}, "
                                                    + $"Definition = {metadata.Definition}, "
                                                    + $"Saved matter name = {importResult.MatterName}";

                            var sequence = new CommonSequence
                            {
                                Matter = matter,
                                Notation = Notation.Nucleotides,
                                RemoteDb = RemoteDb.GenBank,
                                RemoteId = metadata.Version.CompoundAccession
                            };
                            bool partial = metadata.Definition.ToLower().Contains("partial");
                            dnaSequenceRepository.Create(sequence, bioSequence, partial);

                            (importResult.Result, importResult.Status) = importGenes ?
                                                                         ImportFeatures(metadata, sequence) :
                                                                         ("Successfully imported sequence", "Success");
                        }
                        catch (Exception exception)
                        {
                            importResult.Status = "Error";
                            importResult.Result = $"Error: {exception.Message}";
                            while (exception.InnerException != null)
                            {
                                exception = exception.InnerException;
                                importResult.Result += $" {exception.Message}";
                            }

                            foreach (var dbEntityEntry in db.ChangeTracker.Entries())
                            {
                                if (dbEntityEntry.Entity != null)
                                {
                                    dbEntityEntry.State = EntityState.Detached;
                                }
                            }
                        }
                        finally
                        {
                            importResults.Add(importResult);
                        }
                    }

                    string[] names = importResults.Select(r => r.MatterName).ToArray();

                    // removing matters for which adding of sequence failed
                    Matter[] orphanMatters = db.Matter
                                             .Include(m => m.Sequence)
                                             .Where(m => names.Contains(m.Name) && m.Sequence.Count == 0)
                                             .ToArray();

                    if (orphanMatters.Length > 0)
                    {
                        db.Matter.RemoveRange(orphanMatters);
                        db.SaveChanges();
                    }
                }

                var result = new Dictionary <string, object> {
                    { "result", importResults }
                };

                return new Dictionary <string, string> {
                    { "data", JsonConvert.SerializeObject(result) }
                };
            }));
        }
コード例 #24
0
        public void GenBankParserValidateParseOneWithSpecificFormats()
        {
            InitializeXmlVariables();
            // Initialization of xml strings.
            FilePath = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.FilePathNode);
            AlphabetName = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.AlphabetNameNode);
            SeqId = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.SequenceIdNode);
            StrandTopology = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTopologyNode);
            StrandType = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTypeNode);
            Div = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DivisionNode);
            Version = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.VersionNode);
            SequenceDate = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DateNode);
            PrimaryId = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.PrimaryIdNode);
            ExpectedSequence = utilityObj.xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.ExpectedSequenceNode);

            // parse
            using (ISequenceParser parserObj = new GenBankParser(FilePath))
            {
                parserObj.Alphabet = Alphabets.Protein;
                IEnumerable <ISequence> seq = parserObj.Parse();

                Assert.AreEqual(Utility.GetAlphabet(AlphabetName),
                                seq.ElementAt(0).Alphabet);
                Assert.AreEqual(SeqId, seq.ElementAt(0).ID);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

                // test the metadata that is tricky to parse, and will not be tested implicitly by
                // testing the formatting
                GenBankMetadata metadata = (GenBankMetadata)seq.ElementAt(0).Metadata["GenBank"];
                if (metadata.Locus.Strand != SequenceStrandType.None)
                {
                    Assert.AreEqual(StrandType,
                                    metadata.Locus.Strand.ToString());
                }
                Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                                metadata.Locus.StrandTopology.ToString().ToUpper(
                                    CultureInfo.CurrentCulture));
                Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
                Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                                metadata.Locus.Date);
                Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
                Assert.AreEqual(PrimaryId, metadata.Version.GiNumber);
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

                // test the sequence string
                Assert.AreEqual(ExpectedSequence, new string(seq.ElementAt(0).Select(a => (char)a).ToArray()));
                ApplicationLog.WriteLine(
                    "GenBank Parser BVT: Successfully validated the Sequence");
                Console.WriteLine(string.Format((IFormatProvider)null,
                                                "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                                ExpectedSequence));
            }
        }
コード例 #25
0
        public void TestGenBankMetadataWithBinaryFormatter()
        {
            try
            {
                LocationBuilder locBuilder = new LocationBuilder();
                using (Stream stream = File.Open("GenbankMetadata.data", FileMode.Create))
                {
                    BinaryFormatter formatter = new BinaryFormatter();
                    GenBankMetadata metadata  = new GenBankMetadata();
                    metadata = AddGenBankMetadata(metadata);
                    formatter.Serialize(stream, metadata);
                    stream.Seek(0, SeekOrigin.Begin);

                    GenBankMetadata deserializedMetadta = (GenBankMetadata)formatter.Deserialize(stream);

                    Assert.AreEqual(deserializedMetadta.Accession.Primary, metadata.Accession.Primary);

                    for (int i = 0; i < deserializedMetadta.Accession.Secondary.Count; i++)
                    {
                        Assert.AreEqual(deserializedMetadta.Accession.Secondary[i], metadata.Accession.Secondary[i]);
                    }

                    Assert.AreEqual(deserializedMetadta.BaseCount, metadata.BaseCount);

                    for (int i = 0; i < deserializedMetadta.Comments.Count; i++)
                    {
                        Assert.AreEqual(deserializedMetadta.Comments[i], metadata.Comments[i]);
                    }

                    Assert.AreEqual(deserializedMetadta.Contig, metadata.Contig);

                    Assert.AreEqual(deserializedMetadta.DBLink.Type, metadata.DBLink.Type);

                    for (int i = 0; i < deserializedMetadta.DBLink.Numbers.Count; i++)
                    {
                        Assert.AreEqual(deserializedMetadta.DBLink.Numbers[i], metadata.DBLink.Numbers[i]);
                    }

                    Assert.AreEqual(deserializedMetadta.DBSource, metadata.DBSource);
                    Assert.AreEqual(deserializedMetadta.Definition, metadata.Definition);
                    for (int i = 0; i < deserializedMetadta.Features.All.Count; i++)
                    {
                        Assert.AreEqual(deserializedMetadta.Features.All[i].Key, metadata.Features.All[i].Key);
                        Assert.AreEqual(locBuilder.GetLocationString(deserializedMetadta.Features.All[i].Location), locBuilder.GetLocationString(metadata.Features.All[i].Location));

                        foreach (KeyValuePair <string, List <string> > kvp in deserializedMetadta.Features.All[i].Qualifiers)
                        {
                            if (metadata.Features.All[i].Qualifiers.ContainsKey(kvp.Key))
                            {
                                if (kvp.Value == null)
                                {
                                    Assert.IsNull(metadata.Features.All[i].Qualifiers[kvp.Key]);
                                }
                                else
                                {
                                    for (int j = 0; j < kvp.Value.Count; j++)
                                    {
                                        Assert.AreEqual(kvp.Value[j], metadata.Features.All[i].Qualifiers[kvp.Key][j]);
                                    }
                                }
                            }
                            else
                            {
                                Assert.Fail();
                            }
                        }
                    }

                    ValidateDeserializedMetadata(deserializedMetadta, metadata);
                }
            }
            catch (Exception)
            {
                Assert.Fail();
            }
        }
コード例 #26
0
        public void GenBankParserValidateParseOneWithSpecificFormats()
        {
            InitializeXmlVariables();
            // Initialization of xml strings.
            FilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.FilePathNode);
            AlphabetName = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.AlphabetNameNode);
            MolType = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.MoleculeTypeNode);
            IsSequenceReadOnly = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.IsReadOnlyNode);
            SeqId = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.SequenceIdNode);
            StrandTopology = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTopologyNode);
            StrandType = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTypeNode);
            Div = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DivisionNode);
            Version = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.VersionNode);
            SequenceDate = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DateNode);
            PrimaryId = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.PrimaryIdNode);
            ExpectedSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.ExpectedSequenceNode);

            // parse
            BasicSequenceParser parserObj = new GenBankParser();

            parserObj.Alphabet = Alphabets.Protein;
            parserObj.Encoding = NcbiEAAEncoding.Instance;
            ISequence seq = parserObj.ParseOne(FilePath);

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName),
                            seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType),
                            seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType,
                                metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                            metadata.Locus.StrandTopology.ToString().ToUpper(
                                CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                            metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                            ExpectedSequence));
        }
コード例 #27
0
        /// <summary>
        /// Add GenBank metadata
        /// </summary>
        /// <param name="metadata">GenBank Metadata</param>
        /// <returns>GenBank Metadat</returns>
        private static GenBankMetadata AddGenBankMetadata(GenBankMetadata metadata)
        {
            metadata.Accession         = new GenBankAccession();
            metadata.Accession.Primary = "PAccession";
            metadata.Accession.Secondary.Add("SAccession1");
            metadata.Accession.Secondary.Add("SAccession2");
            metadata.BaseCount = "a 1 c 2";
            metadata.Comments.Add("Comment1");
            metadata.Comments.Add("Comment2");
            metadata.Contig      = "Contig Info";
            metadata.DBLink      = new CrossReferenceLink();
            metadata.DBLink.Type = CrossReferenceType.Project;
            metadata.DBLink.Numbers.Add("100");
            metadata.DBLink.Numbers.Add("200");
            metadata.DBSource   = "DbSourceInfo";
            metadata.Definition = "Defination info";
            metadata.Features   = new SequenceFeatures();
            FeatureItem   feature         = new FeatureItem("feature1", "1");
            List <string> qualifierValues = new List <string>();

            qualifierValues.Add("qualifier1value1");
            qualifierValues.Add("qualifier1value2");
            feature.Qualifiers.Add("qualifier1", qualifierValues);
            metadata.Features.All.Add(feature);

            feature         = new FeatureItem("feature2", "2");
            qualifierValues = new List <string>();
            qualifierValues.Add("qualifier2value1");
            qualifierValues.Add("qualifier2value2");
            feature.Qualifiers.Add("qualifier2", qualifierValues);
            metadata.Features.All.Add(feature);
            feature         = new FeatureItem("feature2", "2");
            qualifierValues = new List <string>();
            qualifierValues.Add("qualifier2value1");
            qualifierValues.Add("qualifier2value2");
            feature.Qualifiers.Add("qualifier2", qualifierValues);
            metadata.Features.All.Add(feature);

            metadata.Keywords             = "keywords data";
            metadata.Locus                = new GenBankLocusInfo();
            metadata.Locus.Date           = DateTime.Now;
            metadata.Locus.DivisionCode   = SequenceDivisionCode.CON;
            metadata.Locus.MoleculeType   = MoleculeType.DNA;
            metadata.Locus.Name           = "LocusName";
            metadata.Locus.SequenceLength = 100;
            metadata.Locus.SequenceType   = "bp";
            metadata.Locus.Strand         = SequenceStrandType.Double;
            metadata.Locus.StrandTopology = SequenceStrandTopology.Linear;
            metadata.Origin               = "origin info";
            metadata.Primary              = "Primary info";
            metadata.Project              = new ProjectIdentifier();
            metadata.Project.Name         = "Project1";
            metadata.Project.Numbers.Add("101");
            metadata.Project.Numbers.Add("201");
            CitationReference reference = new CitationReference();

            reference.Authors     = "Authors";
            reference.Consortiums = "Consortiums";
            reference.Journal     = "Journal";
            reference.Location    = "3";
            reference.Medline     = "Medline info";
            reference.Number      = 1;
            reference.PubMed      = "pubmid";
            reference.Remarks     = "remarks";
            reference.Title       = "Title of the book";
            metadata.References.Add(reference);
            reference             = new CitationReference();
            reference.Authors     = "Authors";
            reference.Consortiums = "Consortiums";
            reference.Journal     = "Journal";
            reference.Location    = "4";
            reference.Medline     = "Medline info";
            reference.Number      = 2;
            reference.PubMed      = "pubmid";
            reference.Remarks     = "remarks";
            reference.Title       = "Title of the book";
            metadata.References.Add(reference);
            metadata.Segment                     = new SequenceSegment();
            metadata.Segment.Count               = 2;
            metadata.Segment.Current             = 1;
            metadata.Source                      = new SequenceSource();
            metadata.Source.CommonName           = "ABC Xyz";
            metadata.Source.Organism.Genus       = "ABC";
            metadata.Source.Organism.Species     = "Xyz";
            metadata.Source.Organism.ClassLevels = "123 123";
            metadata.Version                     = new GenBankVersion();
            metadata.Version.Accession           = "PAccession";
            metadata.Version.Version             = "1";
            metadata.Version.GINumber            = "12345";

            return(metadata);
        }
コード例 #28
0
        /// <summary>
        /// Method called when the user clicks Ok button on InputSelectionDialog.
        /// Takes care of parsing the selections and returning the result to the user.
        /// In case there was an error parsing, it will show the input selection dialog again with the sequence highlighted.
        /// </summary>
        /// <param name="selectionDialog">InputSequenceDialog object which raised this event</param>
        private void OnExportSequenceDialogSubmit(ISelectionDialog dialog)
        {
            ExportSelectionDialog    selectionDialog = dialog as ExportSelectionDialog;
            List <ISequence>         parsedSequences = new List <ISequence>();
            List <Range>             rangesInCurrentSequenceItem;
            List <InputSequenceItem> sequenceItems = selectionDialog.GetSequences();
            ISequenceFormatter       formatterUsed = argsForCallback[0] as ISequenceFormatter;

            try
            {
                foreach (InputSequenceItem currentSequenceItem in sequenceItems)
                {
                    try
                    {
                        ISequence sequenceForCurrentItem = null;

                        // Parse sequence
                        if (formatterUsed is GffFormatter && string.IsNullOrWhiteSpace(currentSequenceItem.SequenceAddress))
                        {
                            sequenceForCurrentItem = new Sequence(Alphabets.DNA, "");
                        }
                        else
                        {
                            rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.SequenceAddress);

                            if (rangesInCurrentSequenceItem.Count > 0)
                            {
                                // get from cache with default UI options.
                                sequenceForCurrentItem = SequenceCache.TryGetSequence(rangesInCurrentSequenceItem, selectionDialog.InputParamsAsKey) as ISequence;
                                if (sequenceForCurrentItem == null) // if not in cache
                                {
                                    sequenceForCurrentItem = ExcelSelectionParser.RangeToSequence(rangesInCurrentSequenceItem, selectionDialog.TreatBlankCellsAsGaps, selectionDialog.MoleculeType, currentSequenceItem.SequenceName);
                                    //added default from UI as auto detect and ignore space
                                    SequenceCache.Add(rangesInCurrentSequenceItem, sequenceForCurrentItem, selectionDialog.InputParamsAsKey);
                                }
                                else
                                {
                                    // Set the ID
                                    sequenceForCurrentItem = SetSequenceID(sequenceForCurrentItem, currentSequenceItem.SequenceName);
                                }
                            }
                            else
                            {
                                currentSequenceItem.SetErrorStatus(false);
                            }
                        }
                        //Parse metadata
                        if (formatterUsed is Bio.IO.FastQ.FastQFormatter)
                        {
                            rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress);
                            if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null)
                            {
                                sequenceForCurrentItem = ExcelSelectionParser.RangeToQualitativeSequence(rangesInCurrentSequenceItem, sequenceForCurrentItem);
                            }
                        }
                        else if (formatterUsed is GenBankFormatter)
                        {
                            rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress);
                            if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null)
                            {
                                try
                                {
                                    GenBankMetadata metadata = ExcelSelectionParser.RangeToGenBankMetadata(rangesInCurrentSequenceItem);
                                    sequenceForCurrentItem.Metadata[Helper.GenBankMetadataKey] = metadata;
                                    if (string.IsNullOrEmpty(sequenceForCurrentItem.ID))
                                    {
                                        // Set the ID
                                        sequenceForCurrentItem = SetSequenceID(sequenceForCurrentItem, metadata.Locus.Name);
                                    }
                                }
                                catch
                                {
                                    throw new Exception(Properties.Resources.GenbankMetadataParseError);
                                }
                            }
                        }
                        else if (formatterUsed is GffFormatter)
                        {
                            rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress);
                            if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null)
                            {
                                ExcelSelectionParser.RangeToGffMetadata(sequenceForCurrentItem, rangesInCurrentSequenceItem);
                            }
                        }

                        // Add the parsed sequence to the list of parsed sequences
                        parsedSequences.Add(sequenceForCurrentItem);
                    }
                    catch
                    {
                        // Set error status on item and re-throw
                        currentSequenceItem.SetErrorStatus(true);
                        throw;
                    }
                }

                // On successful parsing...
                if (inputSequenceSelectionComplete != null)
                {
                    inputSequenceSelectionComplete(parsedSequences, this.argsForCallback);
                }
                selectionDialog.InputSelectionDialogSubmitting -= OnInputSequenceDialogSubmit;
                selectionDialog.Close();
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message, Resources.CAPTION, MessageBoxButtons.OK, MessageBoxIcon.Error);
                selectionDialog.ShowDialog();
            }
        }
コード例 #29
0
ファイル: Pivot.cs プロジェクト: vforget/blip
 public static string GetQualifierString(string featureItemKey, string qualifierKey, GenBankMetadata gbMeta)
 {
     Console.WriteLine("In GetQualifierString: " + featureItemKey + " " + qualifierKey);
     
     foreach (FeatureItem fi in gbMeta.Features.All)
     {
         if (fi.Key == featureItemKey)
         {
             foreach (KeyValuePair<string, List<string>> q in fi.Qualifiers)
             {
                 Console.WriteLine(featureItemKey + " " + q.Key + " " + q.Value[0]);
                 if (q.Key == qualifierKey)
                 {
                     return q.Value[0].Trim('"');
                 }
             }
         }
     }
     return "N/A";
 }