Пример #1
0
        public void GenBankFeatures()
        {
            // parse
            ISequenceParser       parser   = new GenBankParser();
            ISequence             seq      = parser.ParseOne(_singleProteinSeqGenBankFilename);
            GenBankMetadata       metadata = seq.Metadata["GenBank"] as GenBankMetadata;
            List <CodingSequence> CDS      = metadata.Features.CodingSequences;

            Assert.AreEqual(CDS.Count, 3);
            Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1);
            Assert.AreEqual(CDS[0].GeneSymbol, string.Empty);
            Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1);
            Assert.IsFalse(CDS[0].Pseudo);
            Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2);
            Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2);
            Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0);
            seq      = parser.ParseOne(_genBankDataPath + @"\NC_001284.gbk");
            metadata = seq.Metadata["GenBank"] as GenBankMetadata;
            Assert.AreEqual(metadata.Features.All.Count, 743);
            Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
            Assert.AreEqual(metadata.Features.Exons.Count, 32);
            Assert.AreEqual(metadata.Features.Introns.Count, 22);
            Assert.AreEqual(metadata.Features.Genes.Count, 60);
            Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
            Assert.AreEqual(metadata.Features.Promoters.Count, 17);
            Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
            Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
            Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117);
            Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString());
            Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count);
        }
Пример #2
0
        public void GenBankFeatureClone()
        {
            ISequenceParser parser   = new GenBankParser();
            ISequence       seq      = parser.ParseOne(@"testdata\GenBank\NC_001284.gbk");
            GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata;

            Assert.AreEqual(metadata.Features.All.Count, 743);
            Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
            Assert.AreEqual(metadata.Features.Exons.Count, 32);
            Assert.AreEqual(metadata.Features.Introns.Count, 22);
            Assert.AreEqual(metadata.Features.Genes.Count, 60);
            Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
            Assert.AreEqual(metadata.Features.Promoters.Count, 17);
            Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
            Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
            Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString());
            Assert.AreEqual(metadata.GetFeatures(11918, 12241).Count, 2);
            GenBankMetadata clonedMetadata = metadata.Clone();

            Assert.AreEqual(clonedMetadata.Features.All.Count, 743);
            Assert.AreEqual(clonedMetadata.Features.CodingSequences.Count, 117);
            Assert.AreEqual(clonedMetadata.Features.Exons.Count, 32);
            Assert.AreEqual(clonedMetadata.Features.Introns.Count, 22);
            Assert.AreEqual(clonedMetadata.Features.Genes.Count, 60);
            Assert.AreEqual(clonedMetadata.Features.MiscFeatures.Count, 455);
            Assert.AreEqual(clonedMetadata.Features.Promoters.Count, 17);
            Assert.AreEqual(clonedMetadata.Features.TransferRNAs.Count, 21);
            Assert.AreEqual(clonedMetadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
            Assert.AreEqual(clonedMetadata.Features.CodingSequences[0].Translation.Trim('"'), clonedMetadata.Features.CodingSequences[0].GetTranslation().ToString());
            Assert.AreEqual(clonedMetadata.GetFeatures(11918, 12241).Count, 2);
        }
Пример #3
0
        public void TestGenBankFailureWhenParsingEmpty()
        {
            ISequenceParser parser = new GenBankParser();
            bool            failed = false;

            // test ParseOne
            try
            {
                ISequence seq = parser.ParseOne(new StringReader(string.Empty));
                failed = true;
            }
            catch (Exception)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string.");
            }

            // test Parse
            try
            {
                IList <ISequence> seqList = parser.Parse(new StringReader(string.Empty));
                failed = true;
            }
            catch (Exception)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for calling Parse on reader containing empty string.");
            }
        }
Пример #4
0
        /// <summary>
        /// Extracts sequence from genbank file.
        /// </summary>
        /// <param name="id">
        /// Accession id of the sequence in ncbi (remote id).
        /// </param>
        /// <returns>
        /// The <see cref="Stream"/>.
        /// </returns>
        public static ISequence DownloadGenBankSequence(string id)
        {
            ISequenceParser parser     = new GenBankParser();
            string          url        = GetEfetchParamsString("gbwithparts") + id;
            Stream          dataStream = GetResponseStream(url);

            return(parser.ParseOne(dataStream));
        }
Пример #5
0
        public void TestGenBankWhenParsingOneOfMany()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence       seq    = parser.ParseOne(_multipleSeqGenBankFilename);

            Assert.IsNotNull(seq);
        }
Пример #6
0
        public void TestGenBankLocusTokenParser()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence       seq    = parser.ParseOne(_genBankFile_LocusTokenParserTest);

            Assert.IsNotNull(seq);
        }
Пример #7
0
        public void TestGenBankFeaturesWithBinaryFormatter()
        {
            Stream stream = null;

            try
            {
                stream = File.Open("GenbankMetadata.data", FileMode.Create);
                BinaryFormatter formatter = new BinaryFormatter();
                ISequenceParser parser    = new GenBankParser();
                ISequence       seq       = parser.ParseOne(@"testdata\GenBank\NC_001284.gbk");
                GenBankMetadata metadata  = seq.Metadata["GenBank"] as GenBankMetadata;
                Assert.AreEqual(metadata.Features.All.Count, 743);
                Assert.AreEqual(metadata.Features.CodingSequences.Count, 117);
                Assert.AreEqual(metadata.Features.Exons.Count, 32);
                Assert.AreEqual(metadata.Features.Introns.Count, 22);
                Assert.AreEqual(metadata.Features.Genes.Count, 60);
                Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455);
                Assert.AreEqual(metadata.Features.Promoters.Count, 17);
                Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21);
                Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
                Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString());
                Assert.AreEqual(metadata.GetFeatures(11918, 12241).Count, 2);
                formatter.Serialize(stream, metadata);
                stream.Seek(0, SeekOrigin.Begin);
                GenBankMetadata deserializedMetadata = (GenBankMetadata)formatter.Deserialize(stream);
                Assert.AreNotSame(metadata, deserializedMetadata);
                Assert.AreEqual(deserializedMetadata.Features.All.Count, 743);
                Assert.AreEqual(deserializedMetadata.Features.CodingSequences.Count, 117);
                Assert.AreEqual(deserializedMetadata.Features.Exons.Count, 32);
                Assert.AreEqual(deserializedMetadata.Features.Introns.Count, 22);
                Assert.AreEqual(deserializedMetadata.Features.Genes.Count, 60);
                Assert.AreEqual(deserializedMetadata.Features.MiscFeatures.Count, 455);
                Assert.AreEqual(deserializedMetadata.Features.Promoters.Count, 17);
                Assert.AreEqual(deserializedMetadata.Features.TransferRNAs.Count, 21);
                Assert.AreEqual(deserializedMetadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117);
                Assert.AreEqual(deserializedMetadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString());
                Assert.AreEqual(deserializedMetadata.GetFeatures(11918, 12241).Count, 2);
            }
            catch
            {
                Assert.Fail();
            }
            finally
            {
                if (stream != null)
                {
                    stream.Close();
                    stream = null;
                }
            }
        }
Пример #8
0
        public void TestGenBankWhenParsingOne()
        {
            // parse
            ISequenceParser parser = new GenBankParser();
            ISequence       seq    = parser.ParseOne(_singleProteinSeqGenBankFilename);

            // test the non-metadata properties
            Assert.IsTrue(seq.IsReadOnly);
            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);
            Assert.AreEqual(MoleculeType.DNA, seq.MoleculeType);
            Assert.AreEqual("SCU49845", seq.DisplayID);
            Assert.AreEqual("SCU49845", seq.ID);

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None);
            Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower());
            Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse("21-JUN-1999"), metadata.Locus.Date);
            Assert.AreEqual("1", metadata.Version.Version);
            Assert.AreEqual("1293613", metadata.Version.GINumber);

            // test that we're correctly putting all types of metadata in the right places
            Assert.AreEqual(1, seq.Metadata.Count);
            IList <CitationReference> referenceList = metadata.References;

            Assert.AreEqual(3, referenceList.Count);
            IList <FeatureItem> featureList = metadata.Features.All;

            Assert.AreEqual(6, featureList.Count);
            Assert.AreEqual(4, featureList[0].Qualifiers.Count);
            Assert.AreEqual(5, featureList[1].Qualifiers.Count);
            Assert.AreEqual(1, featureList[2].Qualifiers.Count);

            // test the sequence string
            string expected = @"GATCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCCGACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCTGCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGAACCGCCAATAGACAACATATGTAACATATTTAGGATATACCTCGAAAATAATAAACCGCCACACTGTCATTATTATAATTAGAAACAGAACGCAAAAATTATCCACTATATAATTCAAAGACGCGAAAAAAAAAGAACAACGCGTCATAGAACTTTTGGCAATTCGCGTCACAAATAAATTTTGGCAACTTATGTTTCCTCTTCGAGCAGTACTCGAGCCCTGTCTCAAGAATGTAATAATACCCATCGTAGGTATGGTTAAAGATAGCATCTCCACAACCTCAAAGCTCCTTGCCGAGAGTCGCCCTCCTTTGTCGAGTAATTTTCACTTTTCATATGAGAACTTATTTTCTTATTCTTTACTCTCACATCCTGTAGTGATTGACACTGCAACAGCCACCATCACTAGAAGAACAGAACAATTACTTAATAGAAAAATTATATCTTCCTCGAAACGATTTCCTGCTTCCAACATCTACGTATATCAAGAAGCATTCACTTACCATGACACAGCTTCAGATTTCATTATTGCTGACAGCTACTATATCACTACTCCATCTAGTAGTGGCCACGCCCTATGAGGCATATCCTATCGGAAAACAATACCCCCCAGTGGCAAGAGTCAATGAATCGTTTACATTTCAAATTTCCAATGATACCTATAAATCGTCTGTAGACAAGACAGCTCAAATAACATACAATTGCTTCGACTTACCGAGCTGGCTTTCGTTTGACTCTAGTTCTAGAACGTTCTCAGGTGAACCTTCTTCTGACTTACTATCTGATGCGAACACCACGTTGTATTTCAATGTAATACTCGAGGGTACGGACTCTGCCGACAGCACGTCTTTGAACAATACATACCAATTTGTTGTTACAAACCGTCCATCCATCTCGCTATCGTCAGATTTCAATCTATTGGCGTTGTTAAAAAACTATGGTTATACTAACGGCAAAAACGCTCTGAAACTAGATCCTAATGAAGTCTTCAACGTGACTTTTGACCGTTCAATGTTCACTAACGAAGAATCCATTGTGTCGTATTACGGACGTTCTCAGTTGTATAATGCGCCGTTACCCAATTGGCTGTTCTTCGATTCTGGCGAGTTGAAGTTTACTGGGACGGCACCGGTGATAAACTCGGCGATTGCTCCAGAAACAAGCTACAGTTTTGTCATCATCGCTACAGACATTGAAGGATTTTCTGCCGTTGAGGTAGAATTCGAATTAGTCATCGGGGCTCACCAGTTAACTACCTCTATTCAAAATAGTTTGATAATCAACGTTACTGACACAGGTAACGTTTCATATGACTTACCTCTAAACTATGTTTATCTCGATGACGATCCTATTTCTTCTGATAAATTGGGTTCTATAAACTTATTGGATGCTCCAGACTGGGTGGCATTAGATAATGCTACCATTTCCGGGTCTGTCCCAGATGAATTACTCGGTAAGAACTCCAATCCTGCCAATTTTTCTGTGTCCATTTATGATACTTATGGTGATGTGATTTATTTCAACTTCGAAGTTGTCTCCACAACGGATTTGTTTGCCATTAGTTCTCTTCCCAATATTAACGCTACAAGGGGTGAATGGTTCTCCTACTATTTTTTGCCTTCTCAGTTTACAGACTACGTGAATACAAACGTTTCATTAGAGTTTACTAATTCAAGCCAAGACCATGACTGGGTGAAATTCCAATCATCTAATTTAACATTAGCTGGAGAAGTGCCCAAGAATTTCGACAAGCTTTCATTAGGTTTGAAAGCGAACCAAGGTTCACAATCTCAAGAGCTATATTTTAACATCATTGGCATGGATTCAAAGATAACTCACTCAAACCACAGTGCGAATGCAACGTCCACAAGAAGTTCTCACCACTCCACCTCAACAAGTTCTTACACATCTTCTACTTACACTGCAAAAATTTCTTCTACCTCCGCTGCTGCTACTTCTTCTGCTCCAGCAGCGCTGCCAGCAGCCAATAAAACTTCATCTCACAATAAAAAAGCAGTAGCAATTGCGTGCGGTGTTGCTATCCCATTAGGCGTTATCCTAGTAGCTCTCATTTGCTTCCTAATATTCTGGAGACGCAGAAGGGAAAATCCAGACGATGAAAACTTACCGCATGCTATTAGTGGACCTGATTTGAATAATCCTGCAAATAAACCAAATCAAGAAAACGCTACACCTTTGAACAACCCCTTTGATGATGATGCTTCCTCGTACGATGATACTTCAATAGCAAGAAGATTGGCTGCTTTGAACACTTTGAAATTGGATAACCACTCTGCCACTGAATCTGATATTTCCAGCGTGGATGAAAAGAGAGATTCTCTATCAGGTATGAATACATACAATGATCAGTTCCAATCCCAAAGTAAAGAAGAATTATTAGCAAAACCCCCAGTACAGCCTCCAGAGAGCCCGTTCTTTGACCCACAGAATAGGTCTTCTTCTGTGTATATGGATAGTGAACCAGCAGTAAATAAATCCTGGCGATATACTGGCAACCTGTCACCAGTCTCTGATATTGTCAGAGACAGTTACGGATCACAAAAAACTGTTGATACAGAAAAACTTTTCGATTTAGAAGCACCAGAGAAGGAAAAACGTACGTCAAGGGATGTCACTATGTCTTCACTGGACCCTTGGAACAGCAATATTAGCCCTTCTCCCGTAAGAAAATCAGTAACACCATCACCATATAACGTAACGAAGCATCGTAACCGCCACTTACAAAATATTCAAGACTCTCAAAGCGGTAAAAACGGAATCACTCCCACAACAATGTCAACTTCATCTTCTGACGATTTTGTTCCGGTTAAAGATGGTGAAAATTTTTGCTGGGTCCATAGCATGGAACCAGACAGAAGACCAAGTAAGAAAAGGTTAGTAGATTTTTCAAATAAGAGTAATGTCAATGTTGGTCAAGTTAAGGACATTCACGGACGCATCCCAGAAATGCTGTGATTATACGCAACGATATTTTGCTTAATTTTATTTTCCTGTTTTATTTTTTATTAGTGGTTTACAGATACCCTATATTTTATTTAGTTTTTATACTTAGAGACATTTAATTTTAATTCCATTCTTCAAATTTCATTTTTGCACTTAAAACAAAGATCCAAAAATGCTCTCGCCCTCTTCATATTGAGAATACACTCCATTCAAAATTTTGTCGTCACCGCTGATTAATTTTTCACTAAACTGATGAATAATCAAAGGCCCCACGTCAGAACCGACTAAAGAAGTGAGTTTTATTTTAGGAGGTTGAAAACCATTATTGTCTGGTAAATTTTCATCTTCTTGACATTTAACCCAGTTTGAATCCCTTTCAATTTCTGCTTTTTCCTCCAAACTATCGACCCTCCTGTTTCTGTCCAACTTATGTCCTAGTTCCAATTCGATCGCATTAATAACTGCTTCAAATGTTATTGTGTCATCGTTGACTTTAGGTAATTTCTCCAAATGCATAATCAAACTATTTAAGGAAGATCGGAATTCGTCGAACACTTCAGTTTCCGTAATGATCTGATCGTCTTTATCCACATGTTGTAATTCACTAAAATCTAAAACGTATTTTTCAATGCATAAATCGTTCTTTTTATTAATAATGCAGATGGAAAATCTGTAAACGTGCGTTAATTTAGAAAGAACATCCAGTATAAGTTCTTCTATATAGTCAATTAAAGCAGGATGCCTATTAATGGGAACGAACTGCGGCAAGTTGAATGACTGGTAAGTAGTGTAGTCGAATGACTGAGGTGGGTATACATTTCTATAAAATAAAATCAAATTAATGTAGCATTTTAAGTATACCCTCAGCCACTTCTCTACCCATCTATTCATAAAGCTGACGCAACGATTACTATTTTTTTTTTCTTCTTGGATCTCAGTCGTCGCAAAAACGTATACCTTCTTTTTCCGACCTTTTTTTTAGCTTTCTGGAAAAGTTTATATTAGTTAAACAGGGTCTAGTCTTAGTGTGAAAGCTAGTGGTTTCGATTGACTGATATTAAGAAAGTGGAAATTAAATTAGTAGTGTAGACGTATATGCATATGTATTTCTCGCCTGTTTATGTTTCTACGTACTTTTGATTTATAGCAAGGGGAAAAGAAATACATACTATTTTTTGGTAAAGGTGAAAGCATAATGTAAAAGCTAGAATAAAATGGACGAAATAAAGAGAGGCTTAGTTCATCTTTTTTCCAAAAAGCACCCAATGATAATAACTAAAATGAAAAGGATTTGCCATCTGTCAGCAACATCAGTTGTGTGAGCAATAATAAAATCATCACCTCCGTTGCCTTTAGCGCGTTTGTCGTTTGTATCTTCCGTAATTTTAGTCTTATCAATGGGAATCATAAATTTTCCAATGAATTAGCAATTTCGTCCAATTCTTTTTGAGCTTCTTCATATTTGCTTTGGAATTCTTCGCACTTCTTTTCCCATTCATCTCTTTCTTCTTCCAAAGCAACGATCCTTCTACCCATTTGCTCAGAGTTCAAATCGGCCTCTTTCAGTTTATCCATTGCTTCCTTCAGTTTGGCTTCACTGTCTTCTAGCTGTTGTTCTAGATCCTGGTTTTTCTTGGTGTAGTTCTCATTATTAGATCTCAAGTTATTGGAGTCTTCAGCCAATTGCTTTGTATCAGACAATTGACTCTCTAACTTCTCCACTTCACTGTCGAGTTGCTCGTTTTTAGCGGACAAAGATTTAATCTCGTTTTCTTTTTCAGTGTTAGATTGCTCTAATTCTTTGAGCTGTTCTCTCAGCTCCTCATATTTTTCTTGCCATGACTCAGATTCTAATTTTAAGCTATTCAATTTCTCTTTGATC";

            Assert.AreEqual(expected, seq.ToString());

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            string             actual    = formatter.FormatString(seq);

            // test the formatting
            Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", ""));
        }
Пример #9
0
        public void TestGenBankWhenUserSetsDnaAlphabet()
        {
            // set correct alphabet and parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.DNA;
            ISequence seq = parser.ParseOne(_singleDnaSeqGenBankFilename);

            Assert.AreEqual(Alphabets.DNA, seq.Alphabet);

            // format
            ISequenceFormatter formatter = new GenBankFormatter();
            string             actual    = formatter.FormatString(seq);

            // test the formatting
            Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", ""));
        }
Пример #10
0
        void InvalidateGenBankParser(string node)
        {
            // Initialization of xml strings.
            FilePath = _utilityObj._xmlUtil.GetTextValue(node,
                                                         Constants.FilePathNode);

            try
            {
                GenBankParser parserObj = new GenBankParser();
                if (string.Equals(Constants.SimpleGenBankNodeName, node))
                {
                    parserObj.LocationBuilder = null;
                }
                else if (string.Equals(Constants.SimpleGenBankPrimaryNode, node))
                {
                    parserObj.Alphabet = Alphabets.RNA;
                }

                parserObj.ParseOne(FilePath);
                Assert.Fail();
            }
            catch (InvalidOperationException)
            {
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
                Console.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
            }
            catch (InvalidDataException)
            {
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
                Console.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
            }
            catch (Exception)
            {
                ApplicationLog.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
                Console.WriteLine(
                    "GenBank Parser : Successfully validated the exception:");
            }
        }
Пример #11
0
        public void TestGenBankWhenUserSetsIncorrectAlphabet()
        {
            // parse
            ISequenceParser parser = new GenBankParser();

            parser.Alphabet = Alphabets.Protein;
            bool failed = false;

            try
            {
                ISequence seq = parser.ParseOne(_singleDnaSeqGenBankFilename);
                failed = true;
            }
            catch (Exception)
            {
                // all is well with the world
            }
            if (failed)
            {
                Assert.Fail("Failed to throw exception for trying to create sequence using incorrect alphabet.");
            }
        }
Пример #12
0
        public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos)
        {
            // BLAST reports are saved in individual files by query and
            // numbered in the same order as they appear in the input FASTA file.
            int[] annotatedIndex = new int[2];
            annotatedIndex[0] = -1;
            annotatedIndex[1] = -1;

            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";

            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser      blastParser  = new BlastXmlParser();
            IList <BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser       gbParser     = new GenBankParser();

            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {
                            Hit blastHit = record.Hits[i];
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {
                                Hsp    blastHsp      = blastHit.Hsps[j];
                                double percentId     = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;

                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    long        gi     = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem  = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string      gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";

                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence           gbRecord = gbParser.ParseOne(gbFile);
                                        GenBankMetadata     gbMeta   = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        IList <FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem         bestItem = getBestFeatureItem(features);
                                        if (bestItem != null)
                                        {
                                            annotatedIndex[0] = i;
                                            annotatedIndex[1] = j;
                                            return(annotatedIndex);
                                        }
                                    }

                                    catch
                                    {
                                        Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                    }
                                    hitsProcessed += 1;
                                }
                            }
                        }
                    }
                }
            }

            return(annotatedIndex);
        }
Пример #13
0
        public void TestFeatureItem()
        {
            ISequence     seq;
            ISequence     featureSeq       = null;
            GenBankParser parser           = new GenBankParser();
            string        _genBankDataPath = @"TestUtils\GenBank";

            seq = parser.ParseOne(_genBankDataPath + @"\BK000016-tpa.gbk");
            GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata;

            #region Test GetSubSequence Method

            featureSeq = metadata.Features.All[0].GetSubSequence(seq);
            int start = metadata.Features.All[0].Location.Start - 1;
            int end   = metadata.Features.All[0].Location.End - start;
            Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString());
            featureSeq = metadata.Features.All[1].GetSubSequence(seq);
            start      = metadata.Features.All[1].Location.Start - 1;
            end        = metadata.Features.All[1].Location.End - start;
            Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString());


            seq = new Sequence(Alphabets.DNA, "ACGTAAAGGT");
            Sequence        refSeq     = new Sequence(Alphabets.DNA, "AAAAATTTT");
            LocationBuilder locbuilder = new LocationBuilder();
            ILocation       loc        = locbuilder.GetLocation("join(complement(4..8),Ref1:5..7)");
            Assert.AreEqual("join(complement(4..8),Ref1:5..7)", locbuilder.GetLocationString(loc));
            FeatureItem fi = new FeatureItem("Feature1", loc);
            Dictionary <string, ISequence> refSeqs = new Dictionary <string, ISequence>();
            refSeqs.Add("Ref1", refSeq);
            ISequence result = fi.GetSubSequence(seq, refSeqs);
            Assert.AreEqual("ATTTCATT", result.ToString());
            #endregion

            #region Test GetSubFeatures Method
            SequenceFeatures seqFeatures = new SequenceFeatures();
            FeatureItem      source      = new FeatureItem("Source", "1..1509");
            FeatureItem      mRNA        = new FeatureItem("mRNA", "join(10..567,789..1320)");
            FeatureItem      cds         = new FeatureItem("CDS", "join(54..567,789..1254)");
            FeatureItem      exon1       = new FeatureItem("Exon", "10..567");
            FeatureItem      intron      = new FeatureItem("Intron", "568..788");
            FeatureItem      exon2       = new FeatureItem("Exon", "789..1320");

            seqFeatures.All.Add(source);
            seqFeatures.All.Add(mRNA);
            seqFeatures.All.Add(cds);
            seqFeatures.All.Add(exon1);
            seqFeatures.All.Add(intron);
            seqFeatures.All.Add(exon2);
            List <FeatureItem> subFeatures = source.GetSubFeatures(seqFeatures);
            Assert.AreEqual(5, subFeatures.Count);
            subFeatures = mRNA.GetSubFeatures(seqFeatures);
            Assert.AreEqual(4, subFeatures.Count);
            subFeatures = cds.GetSubFeatures(seqFeatures);
            Assert.AreEqual(1, subFeatures.Count);
            subFeatures = exon1.GetSubFeatures(seqFeatures);
            Assert.AreEqual(0, subFeatures.Count);
            subFeatures = intron.GetSubFeatures(seqFeatures);
            Assert.AreEqual(0, subFeatures.Count);
            subFeatures = exon2.GetSubFeatures(seqFeatures);
            Assert.AreEqual(0, subFeatures.Count);

            #endregion
        }
Пример #14
0
        public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection)
        {
            
            string queryName = rec.DisplayID.ToString().Split(' ')[0];

            // BLAST reports are saved in individual files by query and 
            // numbered in the same order as they appear in the input FASTA file.
            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";
            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser blastParser = new BlastXmlParser();
            IList<BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser gbParser = new GenBankParser();

            int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos);
            
            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {

                            Hit blastHit = record.Hits[i];
                            // For each HSP
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {
                                
                                Hsp blastHsp = blastHit.Hsps[j];
                                double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;
                                string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity,
                                    Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage,
                                    hitsProcessed, Up.BlastMaxNumHits);
                                // if HSP passes user-defined thresholds
                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    string nextScore = "no";
                                    if ((i + 1) < record.Hits.Count())
                                    {
                                        if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score)
                                        {
                                            nextScore = "less than";
                                        }
                                        else
                                        {
                                            nextScore = "equal";
                                        }
                                    }
                                    else
                                    {
                                        nextScore = "non existent";
                                    }

                                    // parse GI numner from hit
                                    long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";
                                    // init item
                                    string img = "#" + itemId.ToString();
                                    Item item = new Item(itemId, img);
                                    string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                                    item.Name = headerTokens[0];
                                    item.Description = headerTokens[1];

                                    // write pairwise alignment
                                    writePairwiseAlignment(Up, blastHit, j, itemId);

                                    // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record
                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence gbRecord = gbParser.ParseOne(gbFile);
                                        item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord);
                                        GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        CodingSequence bestCds = null;
                                        IList<FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem bestItem = getBestFeatureItem(features);

                                        
                                        if (gbMeta.Features.CodingSequences.Count > 0)
                                        {
                                            bestCds = gbMeta.Features.CodingSequences[0];
                                        }
                                        
                                        for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++)
                                        {
                                            CodingSequence cds = gbMeta.Features.CodingSequences[k];
                                            //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start);
                                            int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start);
                                            int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start);
                                            if (cdsSize > bestSize)
                                            {
                                                bestCds = cds;
                                            }
                                        }
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                                case "InputOrder":
                                                    facet = new Facet(f.Name, f.Type, seqPos);
                                                    break;
                                                case "QuerySequence":
                                                    facet = new Facet(f.Name, f.Type, rec.ToString());
                                                    break;
                                                case "NextScore":
                                                    facet = new Facet(f.Name, f.Type, nextScore);
                                                    break;
                                                case "Annotated":
                                                    string value = "na";
                                                    if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                    {
                                                        value = "top_annotated";
                                                    }
                                                    else
                                                    {
                                                        if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                        {
                                                            value = "top_unannotated";
                                                        }else{
                                                            if (bestItem != null)
                                                            {
                                                                value = "annotated";
                                                            }else{
                                                                value = "unannotated";
                                                            }
                                                        }
                                                    }
                                                    facet = new Facet(f.Name, f.Type, value);
                                                    break;
                                                default:
                                                    //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank);
                                                    facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank);
                                                    break;
                                            }
                                            /*
                                            if (f.Name == "InputOrder")
                                            {
                                                facet = new Facet(f.Name, f.Type, seqPos);
                                            }

                                            else
                                            {
                                                facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item);
                                            }
                                            */
                                            item.Facets.Add(facet);
                                        }

                                    }
                                    //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above)
                                    catch
                                    {

                                        Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        item.Href = "#";
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                                case ("InputOrder"):
                                                    facet = new Facet(f.Name, f.Type, seqPos);
                                                    break;
                                                case "QuerySequence":
                                                    facet = new Facet(f.Name, f.Type, rec.ToString());
                                                    break;
                                                case ("NextScore"):
                                                    facet = new Facet(f.Name, f.Type, "no");
                                                    break;
                                                case "Annotated":
                                                    string value = "na";
                                                    if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                    {
                                                        value = "top_annotated";
                                                    }
                                                    else
                                                    {
                                                        if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                        {
                                                            value = "top_unannotated";
                                                        }
                                                        else
                                                        {
                                                            value = "unannotated";
                                                        }
                                                    }
                                                    facet = new Facet(f.Name, f.Type, value);
                                                    break;
                                                default:
                                                    facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank);
                                                    break;
                                            }
                                            item.Facets.Add(facet);
                                        }
                                        //throw (e);
                                    }
                                    // Add item to collection, increment to next item, 
                                    collection.Items.Add(item);
                                    hitsProcessed += 1;
                                    itemId += 1;
                                }
                            }
                        }
                    }
                    if ((record.Hits.Count()) == 0 || (hitsProcessed == 0))
                    {
                        // Init Pivot item
                        string img = "#" + itemId.ToString();
                        Item item = new Item(itemId, img);
                        item.Href = "#";
                        string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                        item.Name = headerTokens[0];
                        item.Description = headerTokens[1];

                        // Write pairwise alignment to file.
                        writePairwiseAlignment(Up, itemId);

                        // Set facet values for each facet category to default values
                        foreach (FacetCategory f in Up.FacetCategories)
                        {
                            Facet facet = new Facet();
                            switch (f.Name)
                            {
                                case ("InputOrder"):
                                    facet = new Facet(f.Name, f.Type, seqPos);
                                    break;
                                case ("QuerySequence"):
                                    facet = new Facet(f.Name, f.Type, rec.ToString());
                                    break;
                                default:
                                    facet = CreateFacet(f.Name, f.Type, record, item, 0);
                                    break;
                            }
                            item.Facets.Add(facet);
                        }

                        // Add item to collection, increment to next item, skip remaining code
                        collection.Items.Add(item);
                        itemId += 1;
                        hitsProcessed += 1;
                    }
                }
            }
            return itemId;
        }
Пример #15
0
        public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection)
        {
            string queryName = rec.DisplayID.ToString().Split(' ')[0];

            // BLAST reports are saved in individual files by query and
            // numbered in the same order as they appear in the input FASTA file.
            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";

            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser      blastParser  = new BlastXmlParser();
            IList <BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser       gbParser     = new GenBankParser();

            int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos);

            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {
                            Hit blastHit = record.Hits[i];
                            // For each HSP
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {
                                Hsp    blastHsp      = blastHit.Hsps[j];
                                double percentId     = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;
                                string txt           = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity,
                                                                     Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage,
                                                                     hitsProcessed, Up.BlastMaxNumHits);
                                // if HSP passes user-defined thresholds
                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    string nextScore = "no";
                                    if ((i + 1) < record.Hits.Count())
                                    {
                                        if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score)
                                        {
                                            nextScore = "less than";
                                        }
                                        else
                                        {
                                            nextScore = "equal";
                                        }
                                    }
                                    else
                                    {
                                        nextScore = "non existent";
                                    }

                                    // parse GI numner from hit
                                    long        gi     = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem  = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string      gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";
                                    // init item
                                    string   img          = "#" + itemId.ToString();
                                    Item     item         = new Item(itemId, img);
                                    string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                                    item.Name        = headerTokens[0];
                                    item.Description = headerTokens[1];

                                    // write pairwise alignment
                                    writePairwiseAlignment(Up, blastHit, j, itemId);

                                    // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record
                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence gbRecord = gbParser.ParseOne(gbFile);
                                        item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord);
                                        GenBankMetadata     gbMeta   = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        CodingSequence      bestCds  = null;
                                        IList <FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem         bestItem = getBestFeatureItem(features);


                                        if (gbMeta.Features.CodingSequences.Count > 0)
                                        {
                                            bestCds = gbMeta.Features.CodingSequences[0];
                                        }

                                        for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++)
                                        {
                                            CodingSequence cds = gbMeta.Features.CodingSequences[k];
                                            //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start);
                                            int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start);
                                            int cdsSize  = Math.Abs(cds.Location.End - cds.Location.Start);
                                            if (cdsSize > bestSize)
                                            {
                                                bestCds = cds;
                                            }
                                        }
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                            case "InputOrder":
                                                facet = new Facet(f.Name, f.Type, seqPos);
                                                break;

                                            case "QuerySequence":
                                                facet = new Facet(f.Name, f.Type, rec.ToString());
                                                break;

                                            case "NextScore":
                                                facet = new Facet(f.Name, f.Type, nextScore);
                                                break;

                                            case "Annotated":
                                                string value = "na";
                                                if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                {
                                                    value = "top_annotated";
                                                }
                                                else
                                                {
                                                    if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                    {
                                                        value = "top_unannotated";
                                                    }
                                                    else
                                                    {
                                                        if (bestItem != null)
                                                        {
                                                            value = "annotated";
                                                        }
                                                        else
                                                        {
                                                            value = "unannotated";
                                                        }
                                                    }
                                                }
                                                facet = new Facet(f.Name, f.Type, value);
                                                break;

                                            default:
                                                //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank);
                                                facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank);
                                                break;
                                            }

                                            /*
                                             * if (f.Name == "InputOrder")
                                             * {
                                             *  facet = new Facet(f.Name, f.Type, seqPos);
                                             * }
                                             *
                                             * else
                                             * {
                                             *  facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item);
                                             * }
                                             */
                                            item.Facets.Add(facet);
                                        }
                                    }
                                    //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above)
                                    catch
                                    {
                                        Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        item.Href = "#";
                                        foreach (FacetCategory f in Up.FacetCategories)
                                        {
                                            Facet facet = new Facet();
                                            switch (f.Name)
                                            {
                                            case ("InputOrder"):
                                                facet = new Facet(f.Name, f.Type, seqPos);
                                                break;

                                            case "QuerySequence":
                                                facet = new Facet(f.Name, f.Type, rec.ToString());
                                                break;

                                            case ("NextScore"):
                                                facet = new Facet(f.Name, f.Type, "no");
                                                break;

                                            case "Annotated":
                                                string value = "na";
                                                if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j))
                                                {
                                                    value = "top_annotated";
                                                }
                                                else
                                                {
                                                    if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1))
                                                    {
                                                        value = "top_unannotated";
                                                    }
                                                    else
                                                    {
                                                        value = "unannotated";
                                                    }
                                                }
                                                facet = new Facet(f.Name, f.Type, value);
                                                break;

                                            default:
                                                facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank);
                                                break;
                                            }
                                            item.Facets.Add(facet);
                                        }
                                        //throw (e);
                                    }
                                    // Add item to collection, increment to next item,
                                    collection.Items.Add(item);
                                    hitsProcessed += 1;
                                    itemId        += 1;
                                }
                            }
                        }
                    }
                    if ((record.Hits.Count()) == 0 || (hitsProcessed == 0))
                    {
                        // Init Pivot item
                        string img  = "#" + itemId.ToString();
                        Item   item = new Item(itemId, img);
                        item.Href = "#";
                        string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString());
                        item.Name        = headerTokens[0];
                        item.Description = headerTokens[1];

                        // Write pairwise alignment to file.
                        writePairwiseAlignment(Up, itemId);

                        // Set facet values for each facet category to default values
                        foreach (FacetCategory f in Up.FacetCategories)
                        {
                            Facet facet = new Facet();
                            switch (f.Name)
                            {
                            case ("InputOrder"):
                                facet = new Facet(f.Name, f.Type, seqPos);
                                break;

                            case ("QuerySequence"):
                                facet = new Facet(f.Name, f.Type, rec.ToString());
                                break;

                            default:
                                facet = CreateFacet(f.Name, f.Type, record, item, 0);
                                break;
                            }
                            item.Facets.Add(facet);
                        }

                        // Add item to collection, increment to next item, skip remaining code
                        collection.Items.Add(item);
                        itemId        += 1;
                        hitsProcessed += 1;
                    }
                }
            }
            return(itemId);
        }
Пример #16
0
        private void DoGenBank()
        {
            int progValue = 0;
            Dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Normal, new Action(delegate()
            {
                (CurrentControl as UserControl5).UserControl5Step1.Foreground = System.Windows.Media.Brushes.Black;
                progressBar1.Value = progValue;
            }));

            string inputDir = Up.ProjectDir + "\\xml";
            if (!Directory.Exists(inputDir))
            {
                throw new Exception("Directory " + inputDir + " does not exist.");
            }
            string[] blastXmlFiles = Directory.GetFiles(inputDir, "*.xml");
            int c = 1;
            Stack<GenBankItem> giList = new Stack<GenBankItem>();
            foreach (string blastFile in blastXmlFiles)
            {
                BlastXmlParser blastParser = new BlastXmlParser();
                progValue = Convert.ToInt32(Math.Round((double)c / blastXmlFiles.Count() * 100, 0));
                UpdateProgressBar(progValue, "Filtering results");
                try
                {
                    IList<BlastResult> blastResults = blastParser.Parse(blastFile);

                    List<GenBankItem> recordGiList = BlastUtil.filter(blastResults, Up.BlastMaxNumHits, Up.BlastMaxEvalue, Up.BlastMinPercentIdentity, Up.BlastMinPercentQueryCoverage);
                    foreach (GenBankItem gi in recordGiList)
                    {
                        giList.Push(gi);
                        Debug.WriteLine(gi.HitStart.ToString() + " " + gi.HitEnd.ToString());
                    }
                }
                catch
                {
                    FatalErrorDialog("Cannot parse " + blastFile);
                    Debug.WriteLine("Cannot parse " + blastFile);
                }
                c += 1;
            }
            progValue = 0;
            Dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Normal, new Action(delegate()
            {
                (CurrentControl as UserControl5).UserControl5Step2.Foreground = System.Windows.Media.Brushes.Black;
                progressBar1.Value = progValue;
            }));

            int totalGi = giList.Count();
            GenBankParser genkBankParser = new GenBankParser();
            int unParsableCount = 0;
            int notDownloadedCount = 0;
            string unParsableGIs = "";
            string notDownloadedGIs = "";
            bool isConnected = true;
            if (!IsConnectedToInternet())
            {
                isConnected = false;
                MessageBox.Show("Your internet connection appears to be down. As a result, missing GenBank records will not be downloaded.");
            }
            
            while (giList.Count > 0)
            {
                GenBankItem gitem = giList.Pop();
                progValue = Convert.ToInt32(Math.Round(((totalGi - giList.Count()) / (double)totalGi) * 100, 0));
                UpdateProgressBar(progValue, "Downloading GenBank records");
                string outFilename = Up.ProjectDir + "\\gb\\" + gitem.Id;
                outFilename += "_" + gitem.HitStart.ToString();
                outFilename += "_" + gitem.HitEnd.ToString();
                outFilename += ".gb";

                WebClient wc = new WebClient();

                if (File.Exists(outFilename))
                {
                    try
                    {
                        ISequence gpitem = genkBankParser.ParseOne(outFilename);
                    }
                    catch
                    {
                        if (isConnected)
                        {

                            string url = GetGenbankUrl(gitem);
                            try
                            {
                                wc.DownloadFile(url, outFilename);
                                Thread.Sleep(1000);
                            }
                            catch
                            {
                                wc.Proxy = null;
                                giList.Push(gitem);
                            }
                            try
                            {
                                ISequence gpitem = genkBankParser.ParseOne(outFilename);
                            }
                            catch
                            {
                                unParsableCount += 1;
                                unParsableGIs += gitem.Id + ",";
                            }
                        }
                        else
                        {
                            notDownloadedCount += 1;
                            notDownloadedGIs += gitem.Id + ",";
                        }
                    }

                }
                else
                {
                    if (isConnected)
                    {
                        string url = GetGenbankUrl(gitem);
                        try
                        {
                            wc.DownloadFile(url, outFilename);
                            Thread.Sleep(1000);
                        }
                        catch
                        {
                            wc.Proxy = null;
                            giList.Push(gitem);
                        }
                        try
                        {
                            ISequence gpitem = genkBankParser.ParseOne(outFilename);
                        }
                        catch
                        {
                            unParsableCount += 1;
                            unParsableGIs += gitem.Id + ",";
                        }
                    }
                    else
                    {
                        notDownloadedCount += 1;
                        notDownloadedGIs += gitem.Id + ",";
                    }
                }
            }

            if (notDownloadedCount > 0)
            {
                MessageBox.Show("Error downloading GenBank records: " + notDownloadedGIs + ".\r\nThis is likely caused by an interruption in the internet connection. Re-attempt the download by repeating this step.\r\n");
            }
            if (unParsableCount > 0)
            {
                MessageBox.Show("Error parsing GenBank records: " + unParsableGIs + ".\r\nThis is likely due to an unsupported field in the GenBank record. Contact the MBF development team at http://mbf.codeplex.com, and include one of the GI numbers in the bug report.\r\nYou can copy this message to the clipboard using Ctrl-C.\r\n");
            }
        }
Пример #17
0
        public void GenBankParserValidateParseOneWithSpecificFormats()
        {
            InitializeXmlVariables();
            // Initialization of xml strings.
            FilePath = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.FilePathNode);
            AlphabetName = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.AlphabetNameNode);
            MolType = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.MoleculeTypeNode);
            IsSequenceReadOnly = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.IsReadOnlyNode);
            SeqId = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.SequenceIdNode);
            StrandTopology = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTopologyNode);
            StrandType = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.StrandTypeNode);
            Div = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DivisionNode);
            Version = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.VersionNode);
            SequenceDate = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.DateNode);
            PrimaryId = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.PrimaryIdNode);
            ExpectedSequence = _utilityObj._xmlUtil.GetTextValue(
                Constants.SimpleGenBankPrimaryNode,
                Constants.ExpectedSequenceNode);

            // parse
            BasicSequenceParser parserObj = new GenBankParser();

            parserObj.Alphabet = Alphabets.Protein;
            parserObj.Encoding = NcbiEAAEncoding.Instance;
            ISequence seq = parserObj.ParseOne(FilePath);

            Assert.AreEqual(Utility.GetAlphabet(AlphabetName),
                            seq.Alphabet);
            Assert.AreEqual(Utility.GetMoleculeType(MolType),
                            seq.MoleculeType);
            Assert.AreEqual(SeqId, seq.DisplayID);
            Assert.AreEqual(SeqId, seq.ID);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID");

            // test the metadata that is tricky to parse, and will not be tested implicitly by
            // testing the formatting
            GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"];

            if (metadata.Locus.Strand != SequenceStrandType.None)
            {
                Assert.AreEqual(StrandType,
                                metadata.Locus.Strand.ToString());
            }
            Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture),
                            metadata.Locus.StrandTopology.ToString().ToUpper(
                                CultureInfo.CurrentCulture));
            Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString());
            Assert.AreEqual(DateTime.Parse(SequenceDate, null),
                            metadata.Locus.Date);
            Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null));
            Assert.AreEqual(PrimaryId, metadata.Version.GINumber);
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties");

            // test the sequence string
            Assert.AreEqual(ExpectedSequence, seq.ToString());
            ApplicationLog.WriteLine(
                "GenBank Parser BVT: Successfully validated the Sequence");
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "GenBank Parser BVT: Successfully validated the Sequence '{0}'",
                                            ExpectedSequence));
        }
Пример #18
0
        public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos)
        {
            // BLAST reports are saved in individual files by query and 
            // numbered in the same order as they appear in the input FASTA file.
            int[] annotatedIndex = new int[2];
            annotatedIndex[0] = -1;
            annotatedIndex[1] = -1;

            string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml";
            if (!File.Exists(blastFile))
            {
                throw new Exception("File does not exist.");
            }
            BlastXmlParser blastParser = new BlastXmlParser();
            IList<BlastResult> blastResults = blastParser.Parse(blastFile);
            GenBankParser gbParser = new GenBankParser();

            // iterate through the BLAST results.
            foreach (BlastResult blastResult in blastResults)
            {
                foreach (BlastSearchRecord record in blastResult.Records)
                {
                    int hitsProcessed = 0;
                    // If there are not hits in the BLAST result ...
                    int rank = 0;
                    if (record.Hits.Count() > 0)
                    {
                        // For each hit
                        for (int i = 0; i < record.Hits.Count(); i++)
                        {

                            Hit blastHit = record.Hits[i];
                            for (int j = 0; j < blastHit.Hsps.Count(); j++)
                            {

                                Hsp blastHsp = blastHit.Hsps[j];
                                double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100;
                                double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100;
                                
                                if ((percentId >= Up.BlastMinPercentIdentity) &&
                                    (Up.BlastMaxEvalue >= blastHsp.EValue) &&
                                    (queryCoverage >= Up.BlastMinPercentQueryCoverage) &&
                                    (hitsProcessed < Up.BlastMaxNumHits))
                                {
                                    rank += 1;
                                    long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]);
                                    GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd);
                                    string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString();
                                    gbFile += "_" + gitem.HitStart.ToString();
                                    gbFile += "_" + gitem.HitEnd.ToString();
                                    gbFile += ".gb";
                                    
                                    try
                                    {
                                        Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                        ISequence gbRecord = gbParser.ParseOne(gbFile);
                                        GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"];
                                        IList<FeatureItem> features = gbMeta.Features.All;
                                        FeatureItem bestItem = getBestFeatureItem(features);
                                        if (bestItem != null)
                                        {
                                            annotatedIndex[0] = i;
                                            annotatedIndex[1] = j;
                                            return annotatedIndex;
                                        }
                                    }
                                    
                                    catch
                                    {
                                        Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString());
                                    }
                                    hitsProcessed += 1;
                                }
                            }
                        }
                    }
                }
            }
            
            return annotatedIndex;
        }