public void TestGenBankWhenParsingOne() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.Parse(_singleProteinSeqGenBankFilename).FirstOrDefault(); // test the non-metadata properties Assert.AreEqual(Alphabets.DNA, seq.Alphabet); Assert.AreEqual("SCU49845", seq.ID); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None); Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower(CultureInfo.CurrentCulture)); Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse("21-JUN-1999", (IFormatProvider)null), metadata.Locus.Date); Assert.AreEqual("1", metadata.Version.Version); Assert.AreEqual("1293613", metadata.Version.GiNumber); // test that we're correctly putting all types of metadata in the right places Assert.AreEqual(1, seq.Metadata.Count); IList <CitationReference> referenceList = metadata.References; Assert.AreEqual(3, referenceList.Count); IList <FeatureItem> featureList = metadata.Features.All; Assert.AreEqual(6, featureList.Count); Assert.AreEqual(4, featureList[0].Qualifiers.Count); Assert.AreEqual(5, featureList[1].Qualifiers.Count); Assert.AreEqual(1, featureList[2].Qualifiers.Count); // test the sequence string string expected = @"gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc"; Assert.AreEqual(expected, new string(seq.Select(a => (char)a).ToArray())); // format ISequenceFormatter formatter = new GenBankFormatter(); formatter.Format(seq, TempGenBankFileName); string actual = string.Empty; using (StreamReader reader = new StreamReader(TempGenBankFileName)) { actual = reader.ReadToEnd(); } File.Delete(TempGenBankFileName); // test the formatting Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", "").Replace("\r\n", Environment.NewLine), actual.Replace(" ", "")); }
/// <summary> /// Creates matter from genBank metadata. /// </summary> /// <param name="metadata"> /// The metadata. /// </param> /// <returns> /// The <see cref="Matter"/>. /// </returns> public Matter CreateMatterFromGenBankMetadata(GenBankMetadata metadata) { var matter = new Matter { Name = $"{ExtractMatterName(metadata)} | {metadata.Version.CompoundAccession}", Nature = Nature.Genetic }; FillGroupAndSequenceType(matter); return(matter); }
private static void fill_metadata(GenBankMetadata meta, long id) { Metadata metadata = new Metadata(meta, id); long metadata_id = metadata.execute_query(conn); fill_locus(meta.Locus, metadata_id); fill_version(meta.Version, metadata_id); foreach (var feat in meta.Features.All) { fill_feature(feat, metadata_id); } }
public void GenBankParserValidateParseFileName() { InitializeXmlVariables(); // parse ISequenceParser parserObj = new GenBankParser(); IList <ISequence> seqList = parserObj.Parse(FilePath); ISequence seq = seqList[0]; // test the non-metadata properties if (0 == string.Compare(IsSequenceReadOnly, "true", false, CultureInfo.CurrentCulture)) { Assert.IsTrue(seq.IsReadOnly); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the ReadOnly Property"); } Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType); Assert.AreEqual(SeqId, seq.DisplayID); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GINumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ToString()); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); }
public void TestGenBankFeaturesWithBinaryFormatter() { Stream stream = null; try { stream = File.Open("GenbankMetadata.data", FileMode.Create); BinaryFormatter formatter = new BinaryFormatter(); ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(@"testdata\GenBank\NC_001284.gbk"); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString()); Assert.AreEqual(metadata.GetFeatures(11918, 12241).Count, 2); formatter.Serialize(stream, metadata); stream.Seek(0, SeekOrigin.Begin); GenBankMetadata deserializedMetadata = (GenBankMetadata)formatter.Deserialize(stream); Assert.AreNotSame(metadata, deserializedMetadata); Assert.AreEqual(deserializedMetadata.Features.All.Count, 743); Assert.AreEqual(deserializedMetadata.Features.CodingSequences.Count, 117); Assert.AreEqual(deserializedMetadata.Features.Exons.Count, 32); Assert.AreEqual(deserializedMetadata.Features.Introns.Count, 22); Assert.AreEqual(deserializedMetadata.Features.Genes.Count, 60); Assert.AreEqual(deserializedMetadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(deserializedMetadata.Features.Promoters.Count, 17); Assert.AreEqual(deserializedMetadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(deserializedMetadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(deserializedMetadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString()); Assert.AreEqual(deserializedMetadata.GetFeatures(11918, 12241).Count, 2); } catch { Assert.Fail(); } finally { if (stream != null) { stream.Close(); stream = null; } } }
public void GenBankParserValidateParseFileNameWithStream() { InitializeXmlVariables(); List <ISequence> seq = null; IEnumerable <ISequence> seqList = null; // Parse the Stream. using (ISequenceParser parserObj = new GenBankParser()) { using (StreamReader reader = new StreamReader(FilePath)) { seqList = parserObj.Parse(reader); seq = seqList.ToList(); } Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq[0].Alphabet); Assert.AreEqual(SeqId, seq[0].ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq[0].Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq[0].Select(a => (char)a).ToArray())); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); } }
public void TestGenBankWhenParsingOne() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(_singleProteinSeqGenBankFilename); // test the non-metadata properties Assert.IsTrue(seq.IsReadOnly); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); Assert.AreEqual(MoleculeType.DNA, seq.MoleculeType); Assert.AreEqual("SCU49845", seq.DisplayID); Assert.AreEqual("SCU49845", seq.ID); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None); Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower()); Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse("21-JUN-1999"), metadata.Locus.Date); Assert.AreEqual("1", metadata.Version.Version); Assert.AreEqual("1293613", metadata.Version.GINumber); // test that we're correctly putting all types of metadata in the right places Assert.AreEqual(1, seq.Metadata.Count); IList <CitationReference> referenceList = metadata.References; Assert.AreEqual(3, referenceList.Count); IList <FeatureItem> featureList = metadata.Features.All; Assert.AreEqual(6, featureList.Count); Assert.AreEqual(4, featureList[0].Qualifiers.Count); Assert.AreEqual(5, featureList[1].Qualifiers.Count); Assert.AreEqual(1, featureList[2].Qualifiers.Count); // test the sequence string string expected = @"GATCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCCGACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCTGCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGAACCGCCAATAGACAACATATGTAACATATTTAGGATATACCTCGAAAATAATAAACCGCCACACTGTCATTATTATAATTAGAAACAGAACGCAAAAATTATCCACTATATAATTCAAAGACGCGAAAAAAAAAGAACAACGCGTCATAGAACTTTTGGCAATTCGCGTCACAAATAAATTTTGGCAACTTATGTTTCCTCTTCGAGCAGTACTCGAGCCCTGTCTCAAGAATGTAATAATACCCATCGTAGGTATGGTTAAAGATAGCATCTCCACAACCTCAAAGCTCCTTGCCGAGAGTCGCCCTCCTTTGTCGAGTAATTTTCACTTTTCATATGAGAACTTATTTTCTTATTCTTTACTCTCACATCCTGTAGTGATTGACACTGCAACAGCCACCATCACTAGAAGAACAGAACAATTACTTAATAGAAAAATTATATCTTCCTCGAAACGATTTCCTGCTTCCAACATCTACGTATATCAAGAAGCATTCACTTACCATGACACAGCTTCAGATTTCATTATTGCTGACAGCTACTATATCACTACTCCATCTAGTAGTGGCCACGCCCTATGAGGCATATCCTATCGGAAAACAATACCCCCCAGTGGCAAGAGTCAATGAATCGTTTACATTTCAAATTTCCAATGATACCTATAAATCGTCTGTAGACAAGACAGCTCAAATAACATACAATTGCTTCGACTTACCGAGCTGGCTTTCGTTTGACTCTAGTTCTAGAACGTTCTCAGGTGAACCTTCTTCTGACTTACTATCTGATGCGAACACCACGTTGTATTTCAATGTAATACTCGAGGGTACGGACTCTGCCGACAGCACGTCTTTGAACAATACATACCAATTTGTTGTTACAAACCGTCCATCCATCTCGCTATCGTCAGATTTCAATCTATTGGCGTTGTTAAAAAACTATGGTTATACTAACGGCAAAAACGCTCTGAAACTAGATCCTAATGAAGTCTTCAACGTGACTTTTGACCGTTCAATGTTCACTAACGAAGAATCCATTGTGTCGTATTACGGACGTTCTCAGTTGTATAATGCGCCGTTACCCAATTGGCTGTTCTTCGATTCTGGCGAGTTGAAGTTTACTGGGACGGCACCGGTGATAAACTCGGCGATTGCTCCAGAAACAAGCTACAGTTTTGTCATCATCGCTACAGACATTGAAGGATTTTCTGCCGTTGAGGTAGAATTCGAATTAGTCATCGGGGCTCACCAGTTAACTACCTCTATTCAAAATAGTTTGATAATCAACGTTACTGACACAGGTAACGTTTCATATGACTTACCTCTAAACTATGTTTATCTCGATGACGATCCTATTTCTTCTGATAAATTGGGTTCTATAAACTTATTGGATGCTCCAGACTGGGTGGCATTAGATAATGCTACCATTTCCGGGTCTGTCCCAGATGAATTACTCGGTAAGAACTCCAATCCTGCCAATTTTTCTGTGTCCATTTATGATACTTATGGTGATGTGATTTATTTCAACTTCGAAGTTGTCTCCACAACGGATTTGTTTGCCATTAGTTCTCTTCCCAATATTAACGCTACAAGGGGTGAATGGTTCTCCTACTATTTTTTGCCTTCTCAGTTTACAGACTACGTGAATACAAACGTTTCATTAGAGTTTACTAATTCAAGCCAAGACCATGACTGGGTGAAATTCCAATCATCTAATTTAACATTAGCTGGAGAAGTGCCCAAGAATTTCGACAAGCTTTCATTAGGTTTGAAAGCGAACCAAGGTTCACAATCTCAAGAGCTATATTTTAACATCATTGGCATGGATTCAAAGATAACTCACTCAAACCACAGTGCGAATGCAACGTCCACAAGAAGTTCTCACCACTCCACCTCAACAAGTTCTTACACATCTTCTACTTACACTGCAAAAATTTCTTCTACCTCCGCTGCTGCTACTTCTTCTGCTCCAGCAGCGCTGCCAGCAGCCAATAAAACTTCATCTCACAATAAAAAAGCAGTAGCAATTGCGTGCGGTGTTGCTATCCCATTAGGCGTTATCCTAGTAGCTCTCATTTGCTTCCTAATATTCTGGAGACGCAGAAGGGAAAATCCAGACGATGAAAACTTACCGCATGCTATTAGTGGACCTGATTTGAATAATCCTGCAAATAAACCAAATCAAGAAAACGCTACACCTTTGAACAACCCCTTTGATGATGATGCTTCCTCGTACGATGATACTTCAATAGCAAGAAGATTGGCTGCTTTGAACACTTTGAAATTGGATAACCACTCTGCCACTGAATCTGATATTTCCAGCGTGGATGAAAAGAGAGATTCTCTATCAGGTATGAATACATACAATGATCAGTTCCAATCCCAAAGTAAAGAAGAATTATTAGCAAAACCCCCAGTACAGCCTCCAGAGAGCCCGTTCTTTGACCCACAGAATAGGTCTTCTTCTGTGTATATGGATAGTGAACCAGCAGTAAATAAATCCTGGCGATATACTGGCAACCTGTCACCAGTCTCTGATATTGTCAGAGACAGTTACGGATCACAAAAAACTGTTGATACAGAAAAACTTTTCGATTTAGAAGCACCAGAGAAGGAAAAACGTACGTCAAGGGATGTCACTATGTCTTCACTGGACCCTTGGAACAGCAATATTAGCCCTTCTCCCGTAAGAAAATCAGTAACACCATCACCATATAACGTAACGAAGCATCGTAACCGCCACTTACAAAATATTCAAGACTCTCAAAGCGGTAAAAACGGAATCACTCCCACAACAATGTCAACTTCATCTTCTGACGATTTTGTTCCGGTTAAAGATGGTGAAAATTTTTGCTGGGTCCATAGCATGGAACCAGACAGAAGACCAAGTAAGAAAAGGTTAGTAGATTTTTCAAATAAGAGTAATGTCAATGTTGGTCAAGTTAAGGACATTCACGGACGCATCCCAGAAATGCTGTGATTATACGCAACGATATTTTGCTTAATTTTATTTTCCTGTTTTATTTTTTATTAGTGGTTTACAGATACCCTATATTTTATTTAGTTTTTATACTTAGAGACATTTAATTTTAATTCCATTCTTCAAATTTCATTTTTGCACTTAAAACAAAGATCCAAAAATGCTCTCGCCCTCTTCATATTGAGAATACACTCCATTCAAAATTTTGTCGTCACCGCTGATTAATTTTTCACTAAACTGATGAATAATCAAAGGCCCCACGTCAGAACCGACTAAAGAAGTGAGTTTTATTTTAGGAGGTTGAAAACCATTATTGTCTGGTAAATTTTCATCTTCTTGACATTTAACCCAGTTTGAATCCCTTTCAATTTCTGCTTTTTCCTCCAAACTATCGACCCTCCTGTTTCTGTCCAACTTATGTCCTAGTTCCAATTCGATCGCATTAATAACTGCTTCAAATGTTATTGTGTCATCGTTGACTTTAGGTAATTTCTCCAAATGCATAATCAAACTATTTAAGGAAGATCGGAATTCGTCGAACACTTCAGTTTCCGTAATGATCTGATCGTCTTTATCCACATGTTGTAATTCACTAAAATCTAAAACGTATTTTTCAATGCATAAATCGTTCTTTTTATTAATAATGCAGATGGAAAATCTGTAAACGTGCGTTAATTTAGAAAGAACATCCAGTATAAGTTCTTCTATATAGTCAATTAAAGCAGGATGCCTATTAATGGGAACGAACTGCGGCAAGTTGAATGACTGGTAAGTAGTGTAGTCGAATGACTGAGGTGGGTATACATTTCTATAAAATAAAATCAAATTAATGTAGCATTTTAAGTATACCCTCAGCCACTTCTCTACCCATCTATTCATAAAGCTGACGCAACGATTACTATTTTTTTTTTCTTCTTGGATCTCAGTCGTCGCAAAAACGTATACCTTCTTTTTCCGACCTTTTTTTTAGCTTTCTGGAAAAGTTTATATTAGTTAAACAGGGTCTAGTCTTAGTGTGAAAGCTAGTGGTTTCGATTGACTGATATTAAGAAAGTGGAAATTAAATTAGTAGTGTAGACGTATATGCATATGTATTTCTCGCCTGTTTATGTTTCTACGTACTTTTGATTTATAGCAAGGGGAAAAGAAATACATACTATTTTTTGGTAAAGGTGAAAGCATAATGTAAAAGCTAGAATAAAATGGACGAAATAAAGAGAGGCTTAGTTCATCTTTTTTCCAAAAAGCACCCAATGATAATAACTAAAATGAAAAGGATTTGCCATCTGTCAGCAACATCAGTTGTGTGAGCAATAATAAAATCATCACCTCCGTTGCCTTTAGCGCGTTTGTCGTTTGTATCTTCCGTAATTTTAGTCTTATCAATGGGAATCATAAATTTTCCAATGAATTAGCAATTTCGTCCAATTCTTTTTGAGCTTCTTCATATTTGCTTTGGAATTCTTCGCACTTCTTTTCCCATTCATCTCTTTCTTCTTCCAAAGCAACGATCCTTCTACCCATTTGCTCAGAGTTCAAATCGGCCTCTTTCAGTTTATCCATTGCTTCCTTCAGTTTGGCTTCACTGTCTTCTAGCTGTTGTTCTAGATCCTGGTTTTTCTTGGTGTAGTTCTCATTATTAGATCTCAAGTTATTGGAGTCTTCAGCCAATTGCTTTGTATCAGACAATTGACTCTCTAACTTCTCCACTTCACTGTCGAGTTGCTCGTTTTTAGCGGACAAAGATTTAATCTCGTTTTCTTTTTCAGTGTTAGATTGCTCTAATTCTTTGAGCTGTTCTCTCAGCTCCTCATATTTTTCTTGCCATGACTCAGATTCTAATTTTAAGCTATTCAATTTCTCTTTGATC"; Assert.AreEqual(expected, seq.ToString()); // format ISequenceFormatter formatter = new GenBankFormatter(); string actual = formatter.FormatString(seq); // test the formatting Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", "")); }
public void GenBankFeatures() { // parse ISequenceParser parser = new GenBankParser(_singleProteinSeqGenBankFilename); ISequence seq = parser.Parse().FirstOrDefault(); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; List <CodingSequence> CDS = metadata.Features.CodingSequences; Assert.AreEqual(CDS.Count, 3); Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1); Assert.AreEqual(CDS[0].GeneSymbol, string.Empty); Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1); Assert.IsFalse(CDS[0].Pseudo); Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2); Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2); Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2); Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2); Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0); parser.Close(); parser.Dispose(); ISequenceParser parser1 = new GenBankParser(_genBankDataPath + @"\NC_001284.gbk"); ISequence seq1 = parser1.Parse().FirstOrDefault(); metadata = seq1.Metadata["GenBank"] as GenBankMetadata; Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117); ISequence seqTemp = metadata.Features.CodingSequences[0].GetTranslation(); byte[] tempData = new byte[seqTemp.Count]; for (int i = 0; i < seqTemp.Count; i++) { tempData[i] = seqTemp[i]; } string sequenceInString = ASCIIEncoding.ASCII.GetString(tempData); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), sequenceInString.Trim('"')); Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count); }
/// <summary> /// Validate metadata features /// </summary> /// <param name="deserializedMetadta">Deserializaed Metadata</param> /// <param name="metadata">Metadata</param> private static void ValidateDeserializedMetadata(GenBankMetadata deserializedMetadta, GenBankMetadata metadata) { Assert.AreEqual(deserializedMetadta.Keywords, metadata.Keywords); Assert.AreEqual(deserializedMetadta.Locus.Date, metadata.Locus.Date); Assert.AreEqual(deserializedMetadta.Locus.DivisionCode, metadata.Locus.DivisionCode); Assert.AreEqual(deserializedMetadta.Locus.MoleculeType, metadata.Locus.MoleculeType); Assert.AreEqual(deserializedMetadta.Locus.Name, metadata.Locus.Name); Assert.AreEqual(deserializedMetadta.Locus.SequenceLength, metadata.Locus.SequenceLength); Assert.AreEqual(deserializedMetadta.Locus.SequenceType, metadata.Locus.SequenceType); Assert.AreEqual(deserializedMetadta.Locus.Strand, metadata.Locus.Strand); Assert.AreEqual(deserializedMetadta.Locus.StrandTopology, metadata.Locus.StrandTopology); Assert.AreEqual(deserializedMetadta.Origin, deserializedMetadta.Origin); Assert.AreEqual(deserializedMetadta.Primary, metadata.Primary); Assert.AreEqual(deserializedMetadta.Project.Name, metadata.Project.Name); for (int i = 0; i < deserializedMetadta.Project.Numbers.Count; i++) { Assert.AreEqual(deserializedMetadta.Project.Numbers[i], metadata.Project.Numbers[i]); } for (int i = 0; i < deserializedMetadta.References.Count; i++) { Assert.AreEqual(deserializedMetadta.References[i].Authors, metadata.References[i].Authors); Assert.AreEqual(deserializedMetadta.References[i].Consortiums, metadata.References[i].Consortiums); Assert.AreEqual(deserializedMetadta.References[i].Journal, metadata.References[i].Journal); Assert.AreEqual(deserializedMetadta.References[i].Location, metadata.References[i].Location); Assert.AreEqual(deserializedMetadta.References[i].Medline, metadata.References[i].Medline); Assert.AreEqual(deserializedMetadta.References[i].Number, metadata.References[i].Number); Assert.AreEqual(deserializedMetadta.References[i].PubMed, metadata.References[i].PubMed); Assert.AreEqual(deserializedMetadta.References[i].Remarks, metadata.References[i].Remarks); Assert.AreEqual(deserializedMetadta.References[i].Title, metadata.References[i].Title); } Assert.AreEqual(deserializedMetadta.Segment.Current, metadata.Segment.Current); Assert.AreEqual(deserializedMetadta.Segment.Count, metadata.Segment.Count); Assert.AreEqual(deserializedMetadta.Source.CommonName, metadata.Source.CommonName); Assert.AreEqual(deserializedMetadta.Source.Organism.ClassLevels, metadata.Source.Organism.ClassLevels); Assert.AreEqual(deserializedMetadta.Source.Organism.Genus, metadata.Source.Organism.Genus); Assert.AreEqual(deserializedMetadta.Source.Organism.Species, metadata.Source.Organism.Species); Assert.AreEqual(deserializedMetadta.Version.Accession, metadata.Version.Accession); Assert.AreEqual(deserializedMetadta.Version.CompoundAccession, metadata.Version.CompoundAccession); Assert.AreEqual(deserializedMetadta.Version.GINumber, metadata.Version.GINumber); Assert.AreEqual(deserializedMetadta.Version.Version, metadata.Version.Version); }
public void openProject(String file) { Sequence sequence = null; parser = SequenceParsers.GenBank; parser.Open(file); sequence = (Sequence)parser.Parse().ToList()[0]; parser.Close(); Fragment project = new Fragment(file, "project", sequence); GenBankMetadata meta = sequence.Metadata["GenBank"] as GenBankMetadata; FragmentDict = new Dictionary <string, Fragment>(); foreach (var feat in meta.Features.MiscFeatures) { String subseq = project.GetString().Substring(feat.Location.LocationStart - 1, feat.Location.LocationEnd - feat.Location.LocationStart + 1); FragmentDict.Add(feat.StandardName, new Fragment(file, feat.StandardName, new Sequence(Alphabets.DNA, subseq))); } }
/// <summary> /// Extracts supposed sequence name from metadata. /// </summary> /// <param name="metadata"> /// The metadata. /// </param> /// <returns> /// Supposed name as <see cref="string"/>. /// </returns> /// <exception cref="Exception"> /// Thrown if all name fields are contradictory. /// </exception> private static string ExtractMatterName(GenBankMetadata metadata) { string species = metadata.Source.Organism.Species.GetLargestRepeatingSubstring(); string commonName = metadata.Source.CommonName; string definition = metadata.Definition.TrimEnd(", complete genome.") .TrimEnd(", complete sequence.") .TrimEnd(", complete CDS.") .TrimEnd(", complete cds.") .TrimEnd(", genome."); if (commonName.Contains(species) || species.IsSubsetOf(commonName)) { if (definition.Contains(commonName) || commonName.IsSubsetOf(definition)) { return(definition); } if (commonName.Contains(definition) || definition.IsSubsetOf(commonName)) { return(commonName); } return($"{commonName} | {definition}"); } if (species.Contains(commonName) || commonName.IsSubsetOf(species)) { if (definition.Contains(species) || species.IsSubsetOf(definition)) { return(definition); } if (species.Contains(definition) || definition.IsSubsetOf(species)) { return(species); } return($"{species} | {definition}"); } throw new Exception($"Sequences names are not equal. CommonName = {commonName}, Species = {species}, Definition = {definition}"); }
public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos) { // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. int[] annotatedIndex = new int[2]; annotatedIndex[0] = -1; annotatedIndex[1] = -1; string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList <BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; IList <FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (bestItem != null) { annotatedIndex[0] = i; annotatedIndex[1] = j; return(annotatedIndex); } } catch { Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); } hitsProcessed += 1; } } } } } } return(annotatedIndex); }
public void TestFeatureItem() { ISequence seq; ISequence featureSeq = null; GenBankParser parser = new GenBankParser(); string _genBankDataPath = @"TestUtils\GenBank"; seq = parser.ParseOne(_genBankDataPath + @"\BK000016-tpa.gbk"); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; #region Test GetSubSequence Method featureSeq = metadata.Features.All[0].GetSubSequence(seq); int start = metadata.Features.All[0].Location.Start - 1; int end = metadata.Features.All[0].Location.End - start; Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString()); featureSeq = metadata.Features.All[1].GetSubSequence(seq); start = metadata.Features.All[1].Location.Start - 1; end = metadata.Features.All[1].Location.End - start; Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString()); seq = new Sequence(Alphabets.DNA, "ACGTAAAGGT"); Sequence refSeq = new Sequence(Alphabets.DNA, "AAAAATTTT"); LocationBuilder locbuilder = new LocationBuilder(); ILocation loc = locbuilder.GetLocation("join(complement(4..8),Ref1:5..7)"); Assert.AreEqual("join(complement(4..8),Ref1:5..7)", locbuilder.GetLocationString(loc)); FeatureItem fi = new FeatureItem("Feature1", loc); Dictionary <string, ISequence> refSeqs = new Dictionary <string, ISequence>(); refSeqs.Add("Ref1", refSeq); ISequence result = fi.GetSubSequence(seq, refSeqs); Assert.AreEqual("ATTTCATT", result.ToString()); #endregion #region Test GetSubFeatures Method SequenceFeatures seqFeatures = new SequenceFeatures(); FeatureItem source = new FeatureItem("Source", "1..1509"); FeatureItem mRNA = new FeatureItem("mRNA", "join(10..567,789..1320)"); FeatureItem cds = new FeatureItem("CDS", "join(54..567,789..1254)"); FeatureItem exon1 = new FeatureItem("Exon", "10..567"); FeatureItem intron = new FeatureItem("Intron", "568..788"); FeatureItem exon2 = new FeatureItem("Exon", "789..1320"); seqFeatures.All.Add(source); seqFeatures.All.Add(mRNA); seqFeatures.All.Add(cds); seqFeatures.All.Add(exon1); seqFeatures.All.Add(intron); seqFeatures.All.Add(exon2); List <FeatureItem> subFeatures = source.GetSubFeatures(seqFeatures); Assert.AreEqual(5, subFeatures.Count); subFeatures = mRNA.GetSubFeatures(seqFeatures); Assert.AreEqual(4, subFeatures.Count); subFeatures = cds.GetSubFeatures(seqFeatures); Assert.AreEqual(1, subFeatures.Count); subFeatures = exon1.GetSubFeatures(seqFeatures); Assert.AreEqual(0, subFeatures.Count); subFeatures = intron.GetSubFeatures(seqFeatures); Assert.AreEqual(0, subFeatures.Count); subFeatures = exon2.GetSubFeatures(seqFeatures); Assert.AreEqual(0, subFeatures.Count); #endregion }
public void GenBankMetadataClone() { LocationBuilder locBuilder = new LocationBuilder(); GenBankMetadata metadata = new GenBankMetadata(); metadata.Accession = new GenBankAccession(); metadata.Accession.Primary = "PAccession"; metadata.Accession.Secondary.Add("SAccession1"); metadata.Accession.Secondary.Add("SAccession2"); metadata.BaseCount = "a 1 c 2"; metadata.Comments.Add("Comment1"); metadata.Comments.Add("Comment2"); metadata.Contig = "Contig Info"; metadata.DBLink = new CrossReferenceLink(); metadata.DBLink.Type = CrossReferenceType.Project; metadata.DBLink.Numbers.Add("100"); metadata.DBLink.Numbers.Add("200"); metadata.DBSource = "DbSourceInfo"; metadata.Definition = "Defination info"; metadata.Features = new SequenceFeatures(); FeatureItem feature = new FeatureItem("feature1", "1"); List <string> qualifierValues = new List <string>(); qualifierValues.Add("qualifier1value1"); qualifierValues.Add("qualifier1value2"); feature.Qualifiers.Add("qualifier1", qualifierValues); metadata.Features.All.Add(feature); feature = new FeatureItem("feature2", "2"); qualifierValues = new List <string>(); qualifierValues.Add("qualifier2value1"); qualifierValues.Add("qualifier2value2"); feature.Qualifiers.Add("qualifier2", qualifierValues); metadata.Features.All.Add(feature); feature = new FeatureItem("feature2", "2"); qualifierValues = new List <string>(); qualifierValues.Add("qualifier2value1"); qualifierValues.Add("qualifier2value2"); feature.Qualifiers.Add("qualifier2", qualifierValues); metadata.Features.All.Add(feature); metadata.Keywords = "keywords data"; metadata.Locus = new GenBankLocusInfo(); metadata.Locus.Date = DateTime.Now; metadata.Locus.DivisionCode = SequenceDivisionCode.CON; metadata.Locus.MoleculeType = MoleculeType.DNA; metadata.Locus.Name = "LocusName"; metadata.Locus.SequenceLength = 100; metadata.Locus.SequenceType = "bp"; metadata.Locus.Strand = SequenceStrandType.Double; metadata.Locus.StrandTopology = SequenceStrandTopology.Linear; metadata.Origin = "origin info"; metadata.Primary = "Primary info"; metadata.Project = new ProjectIdentifier(); metadata.Project.Name = "Project1"; metadata.Project.Numbers.Add("101"); metadata.Project.Numbers.Add("201"); CitationReference reference = new CitationReference(); reference.Authors = "Authors"; reference.Consortiums = "Consortiums"; reference.Journal = "Journal"; reference.Location = "3"; reference.Medline = "Medline info"; reference.Number = 1; reference.PubMed = "pubmid"; reference.Remarks = "remarks"; reference.Title = "Title of the book"; metadata.References.Add(reference); reference = new CitationReference(); reference.Authors = "Authors"; reference.Consortiums = "Consortiums"; reference.Journal = "Journal"; reference.Location = "4"; reference.Medline = "Medline info"; reference.Number = 2; reference.PubMed = "pubmid"; reference.Remarks = "remarks"; reference.Title = "Title of the book"; metadata.References.Add(reference); metadata.Segment = new SequenceSegment(); metadata.Segment.Count = 2; metadata.Segment.Current = 1; metadata.Source = new SequenceSource(); metadata.Source.CommonName = "ABC Xyz"; metadata.Source.Organism.Genus = "ABC"; metadata.Source.Organism.Species = "Xyz"; metadata.Source.Organism.ClassLevels = "123 123"; metadata.Version = new GenBankVersion(); metadata.Version.Accession = "PAccession"; metadata.Version.Version = "1"; metadata.Version.GINumber = "12345"; GenBankMetadata clonemetadta = metadata.Clone(); Assert.AreEqual(clonemetadta.Accession.Primary, metadata.Accession.Primary); for (int i = 0; i < clonemetadta.Accession.Secondary.Count; i++) { Assert.AreEqual(clonemetadta.Accession.Secondary[i], metadata.Accession.Secondary[i]); } Assert.AreEqual(clonemetadta.BaseCount, metadata.BaseCount); for (int i = 0; i < clonemetadta.Comments.Count; i++) { Assert.AreEqual(clonemetadta.Comments[i], metadata.Comments[i]); } Assert.AreEqual(clonemetadta.Contig, metadata.Contig); Assert.AreEqual(clonemetadta.DBLink.Type, metadata.DBLink.Type); for (int i = 0; i < clonemetadta.DBLink.Numbers.Count; i++) { Assert.AreEqual(clonemetadta.DBLink.Numbers[i], metadata.DBLink.Numbers[i]); } Assert.AreEqual(clonemetadta.DBSource, metadata.DBSource); Assert.AreEqual(clonemetadta.Definition, metadata.Definition); for (int i = 0; i < clonemetadta.Features.All.Count; i++) { Assert.AreEqual(clonemetadta.Features.All[i].Key, metadata.Features.All[i].Key); Assert.AreEqual(locBuilder.GetLocationString(clonemetadta.Features.All[i].Location), locBuilder.GetLocationString(metadata.Features.All[i].Location)); foreach (KeyValuePair <string, List <string> > kvp in clonemetadta.Features.All[i].Qualifiers) { if (metadata.Features.All[i].Qualifiers.ContainsKey(kvp.Key)) { if (kvp.Value == null) { Assert.IsNull(metadata.Features.All[i].Qualifiers[kvp.Key]); } else { for (int j = 0; j < kvp.Value.Count; j++) { Assert.AreEqual(kvp.Value[j], metadata.Features.All[i].Qualifiers[kvp.Key][j]); } } } else { Assert.Fail(); } } } Assert.AreEqual(clonemetadta.Keywords, metadata.Keywords); Assert.AreEqual(clonemetadta.Locus.Date, metadata.Locus.Date); Assert.AreEqual(clonemetadta.Locus.DivisionCode, metadata.Locus.DivisionCode); Assert.AreEqual(clonemetadta.Locus.MoleculeType, metadata.Locus.MoleculeType); Assert.AreEqual(clonemetadta.Locus.Name, metadata.Locus.Name); Assert.AreEqual(clonemetadta.Locus.SequenceLength, metadata.Locus.SequenceLength); Assert.AreEqual(clonemetadta.Locus.SequenceType, metadata.Locus.SequenceType); Assert.AreEqual(clonemetadta.Locus.Strand, metadata.Locus.Strand); Assert.AreEqual(clonemetadta.Locus.StrandTopology, metadata.Locus.StrandTopology); Assert.AreEqual(clonemetadta.Origin, clonemetadta.Origin); Assert.AreEqual(clonemetadta.Primary, metadata.Primary); Assert.AreEqual(clonemetadta.Project.Name, metadata.Project.Name); for (int i = 0; i < clonemetadta.Project.Numbers.Count; i++) { Assert.AreEqual(clonemetadta.Project.Numbers[i], metadata.Project.Numbers[i]); } for (int i = 0; i < clonemetadta.References.Count; i++) { Assert.AreEqual(clonemetadta.References[i].Authors, metadata.References[i].Authors); Assert.AreEqual(clonemetadta.References[i].Consortiums, metadata.References[i].Consortiums); Assert.AreEqual(clonemetadta.References[i].Journal, metadata.References[i].Journal); Assert.AreEqual(clonemetadta.References[i].Location, metadata.References[i].Location); Assert.AreEqual(clonemetadta.References[i].Medline, metadata.References[i].Medline); Assert.AreEqual(clonemetadta.References[i].Number, metadata.References[i].Number); Assert.AreEqual(clonemetadta.References[i].PubMed, metadata.References[i].PubMed); Assert.AreEqual(clonemetadta.References[i].Remarks, metadata.References[i].Remarks); Assert.AreEqual(clonemetadta.References[i].Title, metadata.References[i].Title); } Assert.AreEqual(clonemetadta.Segment.Current, metadata.Segment.Current); Assert.AreEqual(clonemetadta.Segment.Count, metadata.Segment.Count); Assert.AreEqual(clonemetadta.Source.CommonName, metadata.Source.CommonName); Assert.AreEqual(clonemetadta.Source.Organism.ClassLevels, metadata.Source.Organism.ClassLevels); Assert.AreEqual(clonemetadta.Source.Organism.Genus, metadata.Source.Organism.Genus); Assert.AreEqual(clonemetadta.Source.Organism.Species, metadata.Source.Organism.Species); Assert.AreEqual(clonemetadta.Version.Accession, metadata.Version.Accession); Assert.AreEqual(clonemetadta.Version.CompoundAccession, metadata.Version.CompoundAccession); Assert.AreEqual(clonemetadta.Version.GINumber, metadata.Version.GINumber); Assert.AreEqual(clonemetadta.Version.Version, metadata.Version.Version); }
/// <summary> /// Gives out string array of metadata and features just below metadata. /// </summary> /// <param name="metadata">GenBank Metadata</param> /// <returns>string array of metadata</returns> public static string[,] GenBankMetadataToRange(GenBankMetadata metadata) { List <string[]> excelData = new List <string[]>(); List <string> excelRow = new List <string>(); // Add the metadata headers excelRow.Add(Properties.Resources.GenbankMetadataHeader); excelData.Add(excelRow.ToArray()); excelRow.Clear(); if (metadata.Locus != null) { excelData.Add(new string[] { Properties.Resources.GenbankMetadataLocus }); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataName, metadata.Locus.Name); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataSeqLength, metadata.Locus.SequenceLength.ToString()); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataSeqType, metadata.Locus.SequenceType.ToString()); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataStrandType, Helper.GetStrandType(metadata.Locus.Strand)); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataMoleculeType, metadata.Locus.MoleculeType.ToString()); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataStrandTopology, Helper.GetStrandTopology(metadata.Locus.StrandTopology)); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataDivisionCode, metadata.Locus.DivisionCode.ToString()); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataDate, metadata.Locus.Date.ToString("dd-MMM-yyyy").ToUpper()); } if (!string.IsNullOrWhiteSpace(metadata.Definition)) { AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataDefinition, "", metadata.Definition); } if (metadata.Accession != null) { string secondaryAccession = string.Empty; foreach (string accession2 in metadata.Accession.Secondary) { secondaryAccession += accession2 == null ? " " : " " + accession2; } AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataAccession, "", metadata.Accession.Primary + secondaryAccession); } if (metadata.DbLink != null) { string linkNumbers = string.Empty; foreach (string linkNumber in metadata.DbLink.Numbers) { linkNumbers += linkNumber + ","; } AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataDBLink, "", metadata.DbLink.Type.ToString() + ":" + linkNumbers); } if (!string.IsNullOrWhiteSpace(metadata.DbSource)) { AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataDBSource, "", metadata.DbSource); } if (metadata.Version != null) { AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataVersion, "", (metadata.Version.Accession == null ? string.Empty : metadata.Version.Accession) + "." + (metadata.Version.Version == null ? string.Empty : metadata.Version.Version) + " " + Properties.Resources.GenbankMetadataGI + (metadata.Version.GiNumber == null ? string.Empty : metadata.Version.GiNumber)); } if (metadata.Segment != null) { AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataSegment, "", metadata.Segment.Current + " of " + metadata.Segment.Count); } AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataKeywords, "", metadata.Keywords); if (metadata.Source != null) { AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataSource, "", metadata.Source.CommonName == null ? string.Empty : metadata.Source.CommonName); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataOrganism, (metadata.Source.Organism.Genus == null ? string.Empty : metadata.Source.Organism.Genus) + " " + (metadata.Source.Organism.Species == null ? string.Empty : metadata.Source.Organism.Species)); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataClassLevels, metadata.Source.Organism.ClassLevels == null ? string.Empty : metadata.Source.Organism.ClassLevels); } foreach (CitationReference reference in metadata.References) { AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataReference, "", reference.Number.ToString() + " (" + reference.Location + ")"); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataAuthors, reference.Authors); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataTitle, reference.Title); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataJournal, reference.Journal); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataConsortiums, reference.Consortiums); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataMedLine, reference.Medline); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataPubMed, reference.PubMed); AddNameValuePair(excelData, 1, Properties.Resources.GenbankMetadataRemarks, reference.Remarks); } if (!string.IsNullOrWhiteSpace(metadata.Primary)) { AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataPrimary, "", metadata.Primary); } if (metadata.Comments != null && metadata.Comments.Count > 0) { StringBuilder strbuilder = null; foreach (string str in metadata.Comments) { if (strbuilder == null) { strbuilder = new StringBuilder(); } else { strbuilder.Append(Environment.NewLine); } strbuilder.Append(str); } if (strbuilder != null) { AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataComment, "", strbuilder.ToString()); } } if (metadata.Features != null) { // Add the metadata headers excelRow.Add(Properties.Resources.GenbankFeaturesHeader); excelData.Add(excelRow.ToArray()); excelRow.Clear(); IList <FeatureItem> featureList = metadata.Features.All; foreach (FeatureItem featureItem in featureList) { LocationBuilder locBuilder = new LocationBuilder(); // Add the feature headers excelRow.Add(featureItem.Key); excelRow.Add(""); // skip one column excelRow.Add(locBuilder.GetLocationString(featureItem.Location)); excelData.Add(excelRow.ToArray()); excelRow.Clear(); foreach (string key in featureItem.Qualifiers.Keys) { foreach (string value in featureItem.Qualifiers[key]) { AddNameValuePair(excelData, 1, key, value); } } } } if (!string.IsNullOrWhiteSpace(metadata.BaseCount)) { AddNameValuePair(excelData, 0, Properties.Resources.GenbankMetadataBaseCount, "", metadata.BaseCount); } return(ConvertToArray(excelData)); }
/// <summary> /// Extracts features from genBank file downloaded from ncbi. /// </summary> /// <param name="id"> /// Accession id of the sequence in ncbi (remote id). /// </param> /// <returns> /// The <see cref="List{FeatureItem}"/>. /// </returns> public static List <FeatureItem> GetFeatures(string id) { GenBankMetadata metadata = GetMetadata(DownloadGenBankSequence(id)); return(metadata.Features.All); }
public void GenBankFormatterValidateFormatTextWriter() { InitializeXmlVariables(); // Create a Sequence with all attributes. // parse and update the properties instead of parsing entire file. ISequenceParser parser1 = new GenBankParser(); IList <ISequence> seqList1 = parser1.Parse(FilePath); string expectedUpdatedSequence = ExpectedSequence.Replace("\r", "").Replace("\n", "").Replace(" ", ""); Sequence orgSeq = new Sequence(Utility.GetAlphabet(AlphabetName), expectedUpdatedSequence); orgSeq.Metadata.Add("GenBank", (GenBankMetadata)seqList1[0].Metadata["GenBank"]); orgSeq.ID = seqList1[0].ID; orgSeq.DisplayID = seqList1[0].DisplayID; orgSeq.MoleculeType = seqList1[0].MoleculeType; ISequenceFormatter formatter = new GenBankFormatter(); using (TextWriter writer = new StreamWriter(Constants.GenBankTempFileName)) { formatter.Format(orgSeq, writer); } // parse GenBankParser parserObj = new GenBankParser(); IList <ISequence> seqList = parserObj.Parse(Constants.GenBankTempFileName); ISequence seq = seqList[0]; // test the non-metadata properties if (0 == string.Compare(IsSequenceReadOnly, "true", false, CultureInfo.CurrentCulture)) { Assert.IsTrue(seq.IsReadOnly); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the ReadOnly Property"); } Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType); Assert.AreEqual(SeqId, seq.DisplayID); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper(CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GINumber); ApplicationLog.WriteLine( "GenBank Formatter BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ToString()); ApplicationLog.WriteLine("GenBank Formatter BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Formatter BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); File.Delete(Constants.GenBankTempFileName); }
public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection) { string queryName = rec.DisplayID.ToString().Split(' ')[0]; // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList <BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; // For each HSP for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity, Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage, hitsProcessed, Up.BlastMaxNumHits); // if HSP passes user-defined thresholds if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; string nextScore = "no"; if ((i + 1) < record.Hits.Count()) { if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score) { nextScore = "less than"; } else { nextScore = "equal"; } } else { nextScore = "non existent"; } // parse GI numner from hit long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; // init item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // write pairwise alignment writePairwiseAlignment(Up, blastHit, j, itemId); // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; CodingSequence bestCds = null; IList <FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (gbMeta.Features.CodingSequences.Count > 0) { bestCds = gbMeta.Features.CodingSequences[0]; } for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++) { CodingSequence cds = gbMeta.Features.CodingSequences[k]; //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start); int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start); int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start); if (cdsSize > bestSize) { bestCds = cds; } } foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case "InputOrder": facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case "NextScore": facet = new Facet(f.Name, f.Type, nextScore); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { if (bestItem != null) { value = "annotated"; } else { value = "unannotated"; } } } facet = new Facet(f.Name, f.Type, value); break; default: //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank); facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank); break; } /* * if (f.Name == "InputOrder") * { * facet = new Facet(f.Name, f.Type, seqPos); * } * * else * { * facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item); * } */ item.Facets.Add(facet); } } //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above) catch { Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); item.Href = "#"; foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case ("NextScore"): facet = new Facet(f.Name, f.Type, "no"); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { value = "unannotated"; } } facet = new Facet(f.Name, f.Type, value); break; default: facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank); break; } item.Facets.Add(facet); } //throw (e); } // Add item to collection, increment to next item, collection.Items.Add(item); hitsProcessed += 1; itemId += 1; } } } } if ((record.Hits.Count()) == 0 || (hitsProcessed == 0)) { // Init Pivot item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); item.Href = "#"; string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // Write pairwise alignment to file. writePairwiseAlignment(Up, itemId); // Set facet values for each facet category to default values foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case ("QuerySequence"): facet = new Facet(f.Name, f.Type, rec.ToString()); break; default: facet = CreateFacet(f.Name, f.Type, record, item, 0); break; } item.Facets.Add(facet); } // Add item to collection, increment to next item, skip remaining code collection.Items.Add(item); itemId += 1; hitsProcessed += 1; } } } return(itemId); }
public static string GetQualifierString(string featureItemKey, string qualifierKey, GenBankMetadata gbMeta) { Console.WriteLine("In GetQualifierString: " + featureItemKey + " " + qualifierKey); foreach (FeatureItem fi in gbMeta.Features.All) { if (fi.Key == featureItemKey) { foreach (KeyValuePair <string, List <string> > q in fi.Qualifiers) { Console.WriteLine(featureItemKey + " " + q.Key + " " + q.Value[0]); if (q.Key == qualifierKey) { return(q.Value[0].Trim('"')); } } } } return("N/A"); }
public static string GetGenBankIdentifier(ISequence gb) { GenBankMetadata gbMeta = (GenBankMetadata)gb.Metadata["GenBank"]; return(gbMeta.Version.GINumber); }
/// <summary> /// Construct initialization. /// </summary> /// <param name="fragList">Fragment list.</param> /// <param name="maxOverlapLen">Minimum overlap length.</param> private void Init(ObservableCollection <Fragment> fragList, DesignerSettings settings) { this.Overlaps = new List <Overlap>(); this.Settings = settings; //forward String seq5 = ""; String seq3 = ""; String name = ""; List <MiscFeature> featList = new List <MiscFeature>(); for (int i = 0; i < fragList.Count; i++) { name += fragList[i].Name; seq3 = fragList[i].GetString(); int len5 = Math.Min(settings.MaxOverlapLen, seq5.Length); int len3 = Math.Min(settings.MaxGeneSpecificLen, seq3.Length); String overlapping = seq5.Substring(seq5.Length - len5, len5); String geneSpecific = seq3.Substring(0, len3); String loc = (seq5.Length + 1).ToString() + ".." + (seq5.Length + seq3.Length).ToString(); MiscFeature gene = new MiscFeature(loc); gene.StandardName = fragList[i].Name; featList.Add(gene); seq5 += seq3; if (i == 0) { Overlaps.Add(new Overlap(fragList[i].Name + "_fwd", new Sequence(Alphabets.DNA, geneSpecific))); } else { Overlaps.Add(new Overlap(fragList[i].Name + "_fwd", new Sequence(Alphabets.DNA, overlapping), new Sequence(Alphabets.DNA, geneSpecific))); } } this.Sequence = new Sequence(Alphabets.DNA, seq5); //meta GenBankMetadata meta = new GenBankMetadata(); meta.Locus = new GenBankLocusInfo(); meta.Locus.MoleculeType = MoleculeType.DNA; meta.Locus.Name = name; meta.Locus.Date = System.DateTime.Now; meta.Locus.SequenceLength = seq5.Length; meta.Comments.Add("designed with mufasa"); meta.Definition = "synthetic construct"; meta.Features = new SequenceFeatures(); meta.Features.All.AddRange(featList); this.Sequence.Metadata.Add("GenBank", meta); //reverse fragList.Add(new Fragment(fragList[0])); fragList.RemoveAt(0); seq5 = ""; seq3 = ""; for (int i = fragList.Count - 1; i >= 0; i--) { seq5 = fragList[i].GetReverseComplementString(); int len3 = Math.Min(settings.MaxOverlapLen, seq3.Length); int len5 = Math.Min(settings.MaxGeneSpecificLen, seq5.Length); String overlapping = seq3.Substring(seq3.Length - len3, len3); String geneSpecific = seq5.Substring(0, len5); seq3 += seq5; if (i == fragList.Count - 1) { Overlaps.Add(new Overlap(fragList[i].Name + "_rev", new Sequence(Alphabets.DNA, geneSpecific))); } else { Overlaps.Add(new Overlap(fragList[i].Name + "_rev", new Sequence(Alphabets.DNA, overlapping), new Sequence(Alphabets.DNA, geneSpecific))); } } TermoOptimizeOverlaps(); }
//public static Facet CreateFacet(string fName, string fType, BlastSearchRecord rec, int hitId, int hspId, ISequence gb, Item item, string NCBIurl, CodingSequence bestCds, int rank) public static Facet CreateFacet(string fName, string fType, BlastSearchRecord rec, int hitId, int hspId, ISequence gb, Item item, string NCBIurl, FeatureItem bestItem, int rank) { Hit hit = rec.Hits[hitId]; Hsp hsp = hit.Hsps[hspId]; GenBankMetadata gbMeta = (GenBankMetadata)gb.Metadata["GenBank"]; string[] classLevels; switch (fName) { case "QueryName": return(new Facet(fName, fType, item.Name)); case "QueryLen": return(new Facet(fName, fType, rec.IterationQueryLength)); case "Rank": return(new Facet(fName, fType, rank)); case "Score": return(new Facet(fName, fType, Math.Round(hsp.BitScore, 1))); case "Identity": double pi = (hsp.IdentitiesCount / (double)hsp.AlignmentLength) * 100.0; return(new Facet(fName, fType, Math.Round(pi, 0))); case "Span": double sp = ((hsp.QueryEnd - hsp.QueryStart + 1) / (double)rec.IterationQueryLength) * 100.0; return(new Facet(fName, fType, Math.Round(sp, 0))); case "SubjStart": double subjStart = hsp.HitStart; return(new Facet(fName, fType, Math.Round(subjStart, 0))); case "SubjLen": double subjLen = hit.Length; return(new Facet(fName, fType, Math.Round(subjLen, 0))); case "Strand": string strand = FrameToStrand(hsp.QueryFrame) + "/" + FrameToStrand(hsp.HitFrame); return(new Facet(fName, fType, strand)); case "Species": int index = gbMeta.Source.Organism.Species.IndexOf(" ", StringComparison.Ordinal); if (index > 0) { return(new Facet(fName, fType, gbMeta.Source.Organism.Genus + " " + gbMeta.Source.Organism.Species.Substring(0, index))); } else { return(new Facet(fName, fType, gbMeta.Source.Organism.Genus + " " + gbMeta.Source.Organism.Species)); } case "Kingdom": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 1) { return(new Facet(fName, fType, classLevels[0])); } else { return(new Facet(fName, fType, "N/A")); } case "Phylum": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 2) { return(new Facet(fName, fType, classLevels[1])); } else { return(new Facet(fName, fType, "N/A")); } case "Class": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 3) { return(new Facet(fName, fType, classLevels[2])); } else { return(new Facet(fName, fType, "N/A")); } case "Order": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 4) { return(new Facet(fName, fType, classLevels[3])); } else { return(new Facet(fName, fType, "N/A")); } case "Family": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 5) { return(new Facet(fName, fType, classLevels[4])); } else { return(new Facet(fName, fType, "N/A")); } case "Lineage": return(new Facet(fName, fType, gbMeta.Source.Organism.ClassLevels.ToString())); case "Organism": return(new Facet(fName, fType, gbMeta.Source.CommonName)); // return new Facet(fName, fType, gbMeta.Source.Organism.Genus + " " + gbMeta.Source.Organism.Species); case "Genus": return(new Facet(fName, fType, gbMeta.Source.Organism.ClassLevels.Split(';').Last().Trim().TrimEnd('.'))); case "Gene": string name = "N/A"; //if (bestCds != null) if (bestItem != null) { //CodingSequence feature = bestCds; FeatureItem feature = bestItem; String geneSym = "N/A"; foreach (KeyValuePair <string, List <String> > qualifier in feature.Qualifiers) { if (qualifier.Key == "gene") { geneSym = qualifier.Value[0].ToString().Trim('"'); } } if (geneSym != "") { name = geneSym; string url2 = System.Web.HttpUtility.HtmlEncode("http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=search&db=gene&term=" + name + "%5Bsym%5D"); return(new Facet(fName, fType, name, url2)); } } return(new Facet(fName, fType, name, null)); case "GI": return(new Facet(fName, fType, gbMeta.Version.GINumber, NCBIurl + gbMeta.Version.GINumber)); case "Accession": return(new Facet(fName, fType, gbMeta.Version.CompoundAccession, NCBIurl + gbMeta.Version.CompoundAccession)); case "Definition": return(new Facet(fName, fType, gbMeta.Definition)); case "EValue": return(new Facet(fName, fType, String.Format("{0:#e+00}", hsp.EValue))); case "AlignLen": return(new Facet(fName, fType, hsp.AlignmentLength, @"txt\" + item.Id + ".txt")); case "RefCount": int i = 0; foreach (CitationReference r in gbMeta.References) { if ((r.Title != "Direct Submission") && (r.Journal != "Unpublished")) { i++; } } return(new Facet(fName, fType, i)); case "References": if (gbMeta.References.Count() == 0) { return(new Facet(fName, fType)); } string url = CreateReferenceURL(gbMeta.References[0]); Facet f = new Facet(fName, fType); if (gbMeta.References.Count() > 0) { int j = 1; foreach (CitationReference r in gbMeta.References) { if (r.Title != "Direct Submission" && (r.Journal != "Unpublished")) { url = CreateReferenceURL(r); f.Add(new FacetValue(f.Type, String.Format("{0}. {1}. {2}.", j, r.Title, r.Journal), url)); j++; } } } return(f); case "SubmissionDate": DateTime dt = new DateTime(gbMeta.Locus.Date.Year, gbMeta.Locus.Date.Month, gbMeta.Locus.Date.Day); return(new Facet(fName, fType, dt.ToUniversalTime().ToString("o"))); case "Product": Facet productFacet = new Facet(fName, fType, GetQualifierString("Protein", "product", gbMeta)); if (productFacet[0].Value == "N/A") { Console.WriteLine(productFacet[0].Value + "!!!!!!!!!!!!!!!!!!!!!!!!!!***********"); if (bestItem != null) { productFacet = new Facet(fName, fType, GetQualifierStringFromCDS(bestItem, "product")); } Console.WriteLine(productFacet[0].Value + "!!!!!!!!!!!!!!!!!!!!!!!!&&&&&&&&&&&&&"); } return(productFacet); case "Function": Facet funcFacet = new Facet(fName, fType, GetQualifierString("Protein", "function", gbMeta)); if (funcFacet[0].Value == "N/A") { if (bestItem != null) { funcFacet = new Facet(fName, fType, GetQualifierStringFromCDS(bestItem, "function")); } } return(funcFacet); default: throw (new Exception("Facet category with name = " + fName + " does not exist.")); } }
public ActionResult Index( string searchQuery, bool importGenes, bool importPartial, bool filterMinLength, int minLength, bool filterMaxLength, int maxLength) { return(CreateTask(() => { string searchResults; string[] accessions; List <NuccoreObject> nuccoreObjects; if (filterMinLength) { searchResults = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength); } else { searchResults = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength: 1, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery); } nuccoreObjects = NcbiHelper.ExecuteESummaryRequest(searchResults, importPartial); accessions = nuccoreObjects.Select(no => no.AccessionVersion.Split('.')[0]).Distinct().ToArray(); var importResults = new List <MatterImportResult>(accessions.Length); using (var db = new LibiadaWebEntities()) { var matterRepository = new MatterRepository(db); var dnaSequenceRepository = new GeneticSequenceRepository(db); var(existingAccessions, accessionsToImport) = dnaSequenceRepository.SplitAccessionsIntoExistingAndNotImported(accessions); importResults.AddRange(existingAccessions.ConvertAll(existingAccession => new MatterImportResult { MatterName = existingAccession, Result = "Sequence already exists", Status = "Exists" })); foreach (string accession in accessionsToImport) { var importResult = new MatterImportResult() { MatterName = accession }; try { ISequence bioSequence = NcbiHelper.DownloadGenBankSequence(accession); GenBankMetadata metadata = NcbiHelper.GetMetadata(bioSequence); importResult.MatterName = metadata.Version.CompoundAccession; Matter matter = matterRepository.CreateMatterFromGenBankMetadata(metadata); importResult.SequenceType = matter.SequenceType.GetDisplayValue(); importResult.Group = matter.Group.GetDisplayValue(); importResult.MatterName = matter.Name; importResult.AllNames = $"Common name = {metadata.Source.CommonName}, " + $"Species = {metadata.Source.Organism.Species}, " + $"Definition = {metadata.Definition}, " + $"Saved matter name = {importResult.MatterName}"; var sequence = new CommonSequence { Matter = matter, Notation = Notation.Nucleotides, RemoteDb = RemoteDb.GenBank, RemoteId = metadata.Version.CompoundAccession }; bool partial = metadata.Definition.ToLower().Contains("partial"); dnaSequenceRepository.Create(sequence, bioSequence, partial); (importResult.Result, importResult.Status) = importGenes ? ImportFeatures(metadata, sequence) : ("Successfully imported sequence", "Success"); } catch (Exception exception) { importResult.Status = "Error"; importResult.Result = $"Error: {exception.Message}"; while (exception.InnerException != null) { exception = exception.InnerException; importResult.Result += $" {exception.Message}"; } foreach (var dbEntityEntry in db.ChangeTracker.Entries()) { if (dbEntityEntry.Entity != null) { dbEntityEntry.State = EntityState.Detached; } } } finally { importResults.Add(importResult); } } string[] names = importResults.Select(r => r.MatterName).ToArray(); // removing matters for which adding of sequence failed Matter[] orphanMatters = db.Matter .Include(m => m.Sequence) .Where(m => names.Contains(m.Name) && m.Sequence.Count == 0) .ToArray(); if (orphanMatters.Length > 0) { db.Matter.RemoveRange(orphanMatters); db.SaveChanges(); } } var result = new Dictionary <string, object> { { "result", importResults } }; return new Dictionary <string, string> { { "data", JsonConvert.SerializeObject(result) } }; })); }
public void GenBankParserValidateParseOneWithSpecificFormats() { InitializeXmlVariables(); // Initialization of xml strings. FilePath = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.FilePathNode); AlphabetName = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.AlphabetNameNode); SeqId = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.SequenceIdNode); StrandTopology = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTopologyNode); StrandType = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTypeNode); Div = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DivisionNode); Version = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.VersionNode); SequenceDate = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DateNode); PrimaryId = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.PrimaryIdNode); ExpectedSequence = utilityObj.xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.ExpectedSequenceNode); // parse using (ISequenceParser parserObj = new GenBankParser(FilePath)) { parserObj.Alphabet = Alphabets.Protein; IEnumerable <ISequence> seq = parserObj.Parse(); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.ElementAt(0).Alphabet); Assert.AreEqual(SeqId, seq.ElementAt(0).ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.ElementAt(0).Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper( CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GiNumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, new string(seq.ElementAt(0).Select(a => (char)a).ToArray())); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); } }
public void TestGenBankMetadataWithBinaryFormatter() { try { LocationBuilder locBuilder = new LocationBuilder(); using (Stream stream = File.Open("GenbankMetadata.data", FileMode.Create)) { BinaryFormatter formatter = new BinaryFormatter(); GenBankMetadata metadata = new GenBankMetadata(); metadata = AddGenBankMetadata(metadata); formatter.Serialize(stream, metadata); stream.Seek(0, SeekOrigin.Begin); GenBankMetadata deserializedMetadta = (GenBankMetadata)formatter.Deserialize(stream); Assert.AreEqual(deserializedMetadta.Accession.Primary, metadata.Accession.Primary); for (int i = 0; i < deserializedMetadta.Accession.Secondary.Count; i++) { Assert.AreEqual(deserializedMetadta.Accession.Secondary[i], metadata.Accession.Secondary[i]); } Assert.AreEqual(deserializedMetadta.BaseCount, metadata.BaseCount); for (int i = 0; i < deserializedMetadta.Comments.Count; i++) { Assert.AreEqual(deserializedMetadta.Comments[i], metadata.Comments[i]); } Assert.AreEqual(deserializedMetadta.Contig, metadata.Contig); Assert.AreEqual(deserializedMetadta.DBLink.Type, metadata.DBLink.Type); for (int i = 0; i < deserializedMetadta.DBLink.Numbers.Count; i++) { Assert.AreEqual(deserializedMetadta.DBLink.Numbers[i], metadata.DBLink.Numbers[i]); } Assert.AreEqual(deserializedMetadta.DBSource, metadata.DBSource); Assert.AreEqual(deserializedMetadta.Definition, metadata.Definition); for (int i = 0; i < deserializedMetadta.Features.All.Count; i++) { Assert.AreEqual(deserializedMetadta.Features.All[i].Key, metadata.Features.All[i].Key); Assert.AreEqual(locBuilder.GetLocationString(deserializedMetadta.Features.All[i].Location), locBuilder.GetLocationString(metadata.Features.All[i].Location)); foreach (KeyValuePair <string, List <string> > kvp in deserializedMetadta.Features.All[i].Qualifiers) { if (metadata.Features.All[i].Qualifiers.ContainsKey(kvp.Key)) { if (kvp.Value == null) { Assert.IsNull(metadata.Features.All[i].Qualifiers[kvp.Key]); } else { for (int j = 0; j < kvp.Value.Count; j++) { Assert.AreEqual(kvp.Value[j], metadata.Features.All[i].Qualifiers[kvp.Key][j]); } } } else { Assert.Fail(); } } } ValidateDeserializedMetadata(deserializedMetadta, metadata); } } catch (Exception) { Assert.Fail(); } }
public void GenBankParserValidateParseOneWithSpecificFormats() { InitializeXmlVariables(); // Initialization of xml strings. FilePath = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.FilePathNode); AlphabetName = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.AlphabetNameNode); MolType = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.MoleculeTypeNode); IsSequenceReadOnly = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.IsReadOnlyNode); SeqId = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.SequenceIdNode); StrandTopology = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTopologyNode); StrandType = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTypeNode); Div = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DivisionNode); Version = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.VersionNode); SequenceDate = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DateNode); PrimaryId = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.PrimaryIdNode); ExpectedSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.ExpectedSequenceNode); // parse BasicSequenceParser parserObj = new GenBankParser(); parserObj.Alphabet = Alphabets.Protein; parserObj.Encoding = NcbiEAAEncoding.Instance; ISequence seq = parserObj.ParseOne(FilePath); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType); Assert.AreEqual(SeqId, seq.DisplayID); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper( CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GINumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ToString()); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); }
/// <summary> /// Add GenBank metadata /// </summary> /// <param name="metadata">GenBank Metadata</param> /// <returns>GenBank Metadat</returns> private static GenBankMetadata AddGenBankMetadata(GenBankMetadata metadata) { metadata.Accession = new GenBankAccession(); metadata.Accession.Primary = "PAccession"; metadata.Accession.Secondary.Add("SAccession1"); metadata.Accession.Secondary.Add("SAccession2"); metadata.BaseCount = "a 1 c 2"; metadata.Comments.Add("Comment1"); metadata.Comments.Add("Comment2"); metadata.Contig = "Contig Info"; metadata.DBLink = new CrossReferenceLink(); metadata.DBLink.Type = CrossReferenceType.Project; metadata.DBLink.Numbers.Add("100"); metadata.DBLink.Numbers.Add("200"); metadata.DBSource = "DbSourceInfo"; metadata.Definition = "Defination info"; metadata.Features = new SequenceFeatures(); FeatureItem feature = new FeatureItem("feature1", "1"); List <string> qualifierValues = new List <string>(); qualifierValues.Add("qualifier1value1"); qualifierValues.Add("qualifier1value2"); feature.Qualifiers.Add("qualifier1", qualifierValues); metadata.Features.All.Add(feature); feature = new FeatureItem("feature2", "2"); qualifierValues = new List <string>(); qualifierValues.Add("qualifier2value1"); qualifierValues.Add("qualifier2value2"); feature.Qualifiers.Add("qualifier2", qualifierValues); metadata.Features.All.Add(feature); feature = new FeatureItem("feature2", "2"); qualifierValues = new List <string>(); qualifierValues.Add("qualifier2value1"); qualifierValues.Add("qualifier2value2"); feature.Qualifiers.Add("qualifier2", qualifierValues); metadata.Features.All.Add(feature); metadata.Keywords = "keywords data"; metadata.Locus = new GenBankLocusInfo(); metadata.Locus.Date = DateTime.Now; metadata.Locus.DivisionCode = SequenceDivisionCode.CON; metadata.Locus.MoleculeType = MoleculeType.DNA; metadata.Locus.Name = "LocusName"; metadata.Locus.SequenceLength = 100; metadata.Locus.SequenceType = "bp"; metadata.Locus.Strand = SequenceStrandType.Double; metadata.Locus.StrandTopology = SequenceStrandTopology.Linear; metadata.Origin = "origin info"; metadata.Primary = "Primary info"; metadata.Project = new ProjectIdentifier(); metadata.Project.Name = "Project1"; metadata.Project.Numbers.Add("101"); metadata.Project.Numbers.Add("201"); CitationReference reference = new CitationReference(); reference.Authors = "Authors"; reference.Consortiums = "Consortiums"; reference.Journal = "Journal"; reference.Location = "3"; reference.Medline = "Medline info"; reference.Number = 1; reference.PubMed = "pubmid"; reference.Remarks = "remarks"; reference.Title = "Title of the book"; metadata.References.Add(reference); reference = new CitationReference(); reference.Authors = "Authors"; reference.Consortiums = "Consortiums"; reference.Journal = "Journal"; reference.Location = "4"; reference.Medline = "Medline info"; reference.Number = 2; reference.PubMed = "pubmid"; reference.Remarks = "remarks"; reference.Title = "Title of the book"; metadata.References.Add(reference); metadata.Segment = new SequenceSegment(); metadata.Segment.Count = 2; metadata.Segment.Current = 1; metadata.Source = new SequenceSource(); metadata.Source.CommonName = "ABC Xyz"; metadata.Source.Organism.Genus = "ABC"; metadata.Source.Organism.Species = "Xyz"; metadata.Source.Organism.ClassLevels = "123 123"; metadata.Version = new GenBankVersion(); metadata.Version.Accession = "PAccession"; metadata.Version.Version = "1"; metadata.Version.GINumber = "12345"; return(metadata); }
/// <summary> /// Method called when the user clicks Ok button on InputSelectionDialog. /// Takes care of parsing the selections and returning the result to the user. /// In case there was an error parsing, it will show the input selection dialog again with the sequence highlighted. /// </summary> /// <param name="selectionDialog">InputSequenceDialog object which raised this event</param> private void OnExportSequenceDialogSubmit(ISelectionDialog dialog) { ExportSelectionDialog selectionDialog = dialog as ExportSelectionDialog; List <ISequence> parsedSequences = new List <ISequence>(); List <Range> rangesInCurrentSequenceItem; List <InputSequenceItem> sequenceItems = selectionDialog.GetSequences(); ISequenceFormatter formatterUsed = argsForCallback[0] as ISequenceFormatter; try { foreach (InputSequenceItem currentSequenceItem in sequenceItems) { try { ISequence sequenceForCurrentItem = null; // Parse sequence if (formatterUsed is GffFormatter && string.IsNullOrWhiteSpace(currentSequenceItem.SequenceAddress)) { sequenceForCurrentItem = new Sequence(Alphabets.DNA, ""); } else { rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.SequenceAddress); if (rangesInCurrentSequenceItem.Count > 0) { // get from cache with default UI options. sequenceForCurrentItem = SequenceCache.TryGetSequence(rangesInCurrentSequenceItem, selectionDialog.InputParamsAsKey) as ISequence; if (sequenceForCurrentItem == null) // if not in cache { sequenceForCurrentItem = ExcelSelectionParser.RangeToSequence(rangesInCurrentSequenceItem, selectionDialog.TreatBlankCellsAsGaps, selectionDialog.MoleculeType, currentSequenceItem.SequenceName); //added default from UI as auto detect and ignore space SequenceCache.Add(rangesInCurrentSequenceItem, sequenceForCurrentItem, selectionDialog.InputParamsAsKey); } else { // Set the ID sequenceForCurrentItem = SetSequenceID(sequenceForCurrentItem, currentSequenceItem.SequenceName); } } else { currentSequenceItem.SetErrorStatus(false); } } //Parse metadata if (formatterUsed is Bio.IO.FastQ.FastQFormatter) { rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress); if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null) { sequenceForCurrentItem = ExcelSelectionParser.RangeToQualitativeSequence(rangesInCurrentSequenceItem, sequenceForCurrentItem); } } else if (formatterUsed is GenBankFormatter) { rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress); if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null) { try { GenBankMetadata metadata = ExcelSelectionParser.RangeToGenBankMetadata(rangesInCurrentSequenceItem); sequenceForCurrentItem.Metadata[Helper.GenBankMetadataKey] = metadata; if (string.IsNullOrEmpty(sequenceForCurrentItem.ID)) { // Set the ID sequenceForCurrentItem = SetSequenceID(sequenceForCurrentItem, metadata.Locus.Name); } } catch { throw new Exception(Properties.Resources.GenbankMetadataParseError); } } } else if (formatterUsed is GffFormatter) { rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.MetadataAddress); if (rangesInCurrentSequenceItem.Count > 0 && sequenceForCurrentItem != null) { ExcelSelectionParser.RangeToGffMetadata(sequenceForCurrentItem, rangesInCurrentSequenceItem); } } // Add the parsed sequence to the list of parsed sequences parsedSequences.Add(sequenceForCurrentItem); } catch { // Set error status on item and re-throw currentSequenceItem.SetErrorStatus(true); throw; } } // On successful parsing... if (inputSequenceSelectionComplete != null) { inputSequenceSelectionComplete(parsedSequences, this.argsForCallback); } selectionDialog.InputSelectionDialogSubmitting -= OnInputSequenceDialogSubmit; selectionDialog.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message, Resources.CAPTION, MessageBoxButtons.OK, MessageBoxIcon.Error); selectionDialog.ShowDialog(); } }
public static string GetQualifierString(string featureItemKey, string qualifierKey, GenBankMetadata gbMeta) { Console.WriteLine("In GetQualifierString: " + featureItemKey + " " + qualifierKey); foreach (FeatureItem fi in gbMeta.Features.All) { if (fi.Key == featureItemKey) { foreach (KeyValuePair<string, List<string>> q in fi.Qualifiers) { Console.WriteLine(featureItemKey + " " + q.Key + " " + q.Value[0]); if (q.Key == qualifierKey) { return q.Value[0].Trim('"'); } } } } return "N/A"; }