public void GenBankFeatures() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(_singleProteinSeqGenBankFilename); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; List <CodingSequence> CDS = metadata.Features.CodingSequences; Assert.AreEqual(CDS.Count, 3); Assert.AreEqual(CDS[0].DatabaseCrossReference.Count, 1); Assert.AreEqual(CDS[0].GeneSymbol, string.Empty); Assert.AreEqual(metadata.Features.GetFeatures("source").Count, 1); Assert.IsFalse(CDS[0].Pseudo); Assert.AreEqual(metadata.GetFeatures(1, 109).Count, 2); Assert.AreEqual(metadata.GetFeatures(1, 10).Count, 2); Assert.AreEqual(metadata.GetFeatures(10, 100).Count, 2); Assert.AreEqual(metadata.GetFeatures(120, 150).Count, 2); Assert.AreEqual(metadata.GetCitationsReferredInFeatures().Count, 0); seq = parser.ParseOne(_genBankDataPath + @"\NC_001284.gbk"); metadata = seq.Metadata["GenBank"] as GenBankMetadata; Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.GetFeatures(StandardFeatureKeys.CodingSequence).Count, 117); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString()); Assert.AreEqual(2, metadata.GetFeatures(11918, 12241).Count); }
public void GenBankFeatureClone() { ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(@"testdata\GenBank\NC_001284.gbk"); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString()); Assert.AreEqual(metadata.GetFeatures(11918, 12241).Count, 2); GenBankMetadata clonedMetadata = metadata.Clone(); Assert.AreEqual(clonedMetadata.Features.All.Count, 743); Assert.AreEqual(clonedMetadata.Features.CodingSequences.Count, 117); Assert.AreEqual(clonedMetadata.Features.Exons.Count, 32); Assert.AreEqual(clonedMetadata.Features.Introns.Count, 22); Assert.AreEqual(clonedMetadata.Features.Genes.Count, 60); Assert.AreEqual(clonedMetadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(clonedMetadata.Features.Promoters.Count, 17); Assert.AreEqual(clonedMetadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(clonedMetadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(clonedMetadata.Features.CodingSequences[0].Translation.Trim('"'), clonedMetadata.Features.CodingSequences[0].GetTranslation().ToString()); Assert.AreEqual(clonedMetadata.GetFeatures(11918, 12241).Count, 2); }
public void TestGenBankFailureWhenParsingEmpty() { ISequenceParser parser = new GenBankParser(); bool failed = false; // test ParseOne try { ISequence seq = parser.ParseOne(new StringReader(string.Empty)); failed = true; } catch (Exception) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for calling ParseOne on reader containing empty string."); } // test Parse try { IList <ISequence> seqList = parser.Parse(new StringReader(string.Empty)); failed = true; } catch (Exception) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for calling Parse on reader containing empty string."); } }
/// <summary> /// Extracts sequence from genbank file. /// </summary> /// <param name="id"> /// Accession id of the sequence in ncbi (remote id). /// </param> /// <returns> /// The <see cref="Stream"/>. /// </returns> public static ISequence DownloadGenBankSequence(string id) { ISequenceParser parser = new GenBankParser(); string url = GetEfetchParamsString("gbwithparts") + id; Stream dataStream = GetResponseStream(url); return(parser.ParseOne(dataStream)); }
public void TestGenBankWhenParsingOneOfMany() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(_multipleSeqGenBankFilename); Assert.IsNotNull(seq); }
public void TestGenBankLocusTokenParser() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(_genBankFile_LocusTokenParserTest); Assert.IsNotNull(seq); }
public void TestGenBankFeaturesWithBinaryFormatter() { Stream stream = null; try { stream = File.Open("GenbankMetadata.data", FileMode.Create); BinaryFormatter formatter = new BinaryFormatter(); ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(@"testdata\GenBank\NC_001284.gbk"); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; Assert.AreEqual(metadata.Features.All.Count, 743); Assert.AreEqual(metadata.Features.CodingSequences.Count, 117); Assert.AreEqual(metadata.Features.Exons.Count, 32); Assert.AreEqual(metadata.Features.Introns.Count, 22); Assert.AreEqual(metadata.Features.Genes.Count, 60); Assert.AreEqual(metadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(metadata.Features.Promoters.Count, 17); Assert.AreEqual(metadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(metadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(metadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString()); Assert.AreEqual(metadata.GetFeatures(11918, 12241).Count, 2); formatter.Serialize(stream, metadata); stream.Seek(0, SeekOrigin.Begin); GenBankMetadata deserializedMetadata = (GenBankMetadata)formatter.Deserialize(stream); Assert.AreNotSame(metadata, deserializedMetadata); Assert.AreEqual(deserializedMetadata.Features.All.Count, 743); Assert.AreEqual(deserializedMetadata.Features.CodingSequences.Count, 117); Assert.AreEqual(deserializedMetadata.Features.Exons.Count, 32); Assert.AreEqual(deserializedMetadata.Features.Introns.Count, 22); Assert.AreEqual(deserializedMetadata.Features.Genes.Count, 60); Assert.AreEqual(deserializedMetadata.Features.MiscFeatures.Count, 455); Assert.AreEqual(deserializedMetadata.Features.Promoters.Count, 17); Assert.AreEqual(deserializedMetadata.Features.TransferRNAs.Count, 21); Assert.AreEqual(deserializedMetadata.Features.All.FindAll(F => F.Key.Equals(StandardFeatureKeys.CodingSequence)).Count, 117); Assert.AreEqual(deserializedMetadata.Features.CodingSequences[0].Translation.Trim('"'), metadata.Features.CodingSequences[0].GetTranslation().ToString()); Assert.AreEqual(deserializedMetadata.GetFeatures(11918, 12241).Count, 2); } catch { Assert.Fail(); } finally { if (stream != null) { stream.Close(); stream = null; } } }
public void TestGenBankWhenParsingOne() { // parse ISequenceParser parser = new GenBankParser(); ISequence seq = parser.ParseOne(_singleProteinSeqGenBankFilename); // test the non-metadata properties Assert.IsTrue(seq.IsReadOnly); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); Assert.AreEqual(MoleculeType.DNA, seq.MoleculeType); Assert.AreEqual("SCU49845", seq.DisplayID); Assert.AreEqual("SCU49845", seq.ID); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; Assert.AreEqual(metadata.Locus.Strand, SequenceStrandType.None); Assert.AreEqual("none", metadata.Locus.StrandTopology.ToString().ToLower()); Assert.AreEqual("PLN", metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse("21-JUN-1999"), metadata.Locus.Date); Assert.AreEqual("1", metadata.Version.Version); Assert.AreEqual("1293613", metadata.Version.GINumber); // test that we're correctly putting all types of metadata in the right places Assert.AreEqual(1, seq.Metadata.Count); IList <CitationReference> referenceList = metadata.References; Assert.AreEqual(3, referenceList.Count); IList <FeatureItem> featureList = metadata.Features.All; Assert.AreEqual(6, featureList.Count); Assert.AreEqual(4, featureList[0].Qualifiers.Count); Assert.AreEqual(5, featureList[1].Qualifiers.Count); Assert.AreEqual(1, featureList[2].Qualifiers.Count); // test the sequence string string expected = @"GATCCTCCATATACAACGGTATCTCCACCTCAGGTTTAGATCTCAACAACGGAACCATTGCCGACATGAGACAGTTAGGTATCGTCGAGAGTTACAAGCTAAAACGAGCAGTAGTCAGCTCTGCATCTGAAGCCGCTGAAGTTCTACTAAGGGTGGATAACATCATCCGTGCAAGACCAAGAACCGCCAATAGACAACATATGTAACATATTTAGGATATACCTCGAAAATAATAAACCGCCACACTGTCATTATTATAATTAGAAACAGAACGCAAAAATTATCCACTATATAATTCAAAGACGCGAAAAAAAAAGAACAACGCGTCATAGAACTTTTGGCAATTCGCGTCACAAATAAATTTTGGCAACTTATGTTTCCTCTTCGAGCAGTACTCGAGCCCTGTCTCAAGAATGTAATAATACCCATCGTAGGTATGGTTAAAGATAGCATCTCCACAACCTCAAAGCTCCTTGCCGAGAGTCGCCCTCCTTTGTCGAGTAATTTTCACTTTTCATATGAGAACTTATTTTCTTATTCTTTACTCTCACATCCTGTAGTGATTGACACTGCAACAGCCACCATCACTAGAAGAACAGAACAATTACTTAATAGAAAAATTATATCTTCCTCGAAACGATTTCCTGCTTCCAACATCTACGTATATCAAGAAGCATTCACTTACCATGACACAGCTTCAGATTTCATTATTGCTGACAGCTACTATATCACTACTCCATCTAGTAGTGGCCACGCCCTATGAGGCATATCCTATCGGAAAACAATACCCCCCAGTGGCAAGAGTCAATGAATCGTTTACATTTCAAATTTCCAATGATACCTATAAATCGTCTGTAGACAAGACAGCTCAAATAACATACAATTGCTTCGACTTACCGAGCTGGCTTTCGTTTGACTCTAGTTCTAGAACGTTCTCAGGTGAACCTTCTTCTGACTTACTATCTGATGCGAACACCACGTTGTATTTCAATGTAATACTCGAGGGTACGGACTCTGCCGACAGCACGTCTTTGAACAATACATACCAATTTGTTGTTACAAACCGTCCATCCATCTCGCTATCGTCAGATTTCAATCTATTGGCGTTGTTAAAAAACTATGGTTATACTAACGGCAAAAACGCTCTGAAACTAGATCCTAATGAAGTCTTCAACGTGACTTTTGACCGTTCAATGTTCACTAACGAAGAATCCATTGTGTCGTATTACGGACGTTCTCAGTTGTATAATGCGCCGTTACCCAATTGGCTGTTCTTCGATTCTGGCGAGTTGAAGTTTACTGGGACGGCACCGGTGATAAACTCGGCGATTGCTCCAGAAACAAGCTACAGTTTTGTCATCATCGCTACAGACATTGAAGGATTTTCTGCCGTTGAGGTAGAATTCGAATTAGTCATCGGGGCTCACCAGTTAACTACCTCTATTCAAAATAGTTTGATAATCAACGTTACTGACACAGGTAACGTTTCATATGACTTACCTCTAAACTATGTTTATCTCGATGACGATCCTATTTCTTCTGATAAATTGGGTTCTATAAACTTATTGGATGCTCCAGACTGGGTGGCATTAGATAATGCTACCATTTCCGGGTCTGTCCCAGATGAATTACTCGGTAAGAACTCCAATCCTGCCAATTTTTCTGTGTCCATTTATGATACTTATGGTGATGTGATTTATTTCAACTTCGAAGTTGTCTCCACAACGGATTTGTTTGCCATTAGTTCTCTTCCCAATATTAACGCTACAAGGGGTGAATGGTTCTCCTACTATTTTTTGCCTTCTCAGTTTACAGACTACGTGAATACAAACGTTTCATTAGAGTTTACTAATTCAAGCCAAGACCATGACTGGGTGAAATTCCAATCATCTAATTTAACATTAGCTGGAGAAGTGCCCAAGAATTTCGACAAGCTTTCATTAGGTTTGAAAGCGAACCAAGGTTCACAATCTCAAGAGCTATATTTTAACATCATTGGCATGGATTCAAAGATAACTCACTCAAACCACAGTGCGAATGCAACGTCCACAAGAAGTTCTCACCACTCCACCTCAACAAGTTCTTACACATCTTCTACTTACACTGCAAAAATTTCTTCTACCTCCGCTGCTGCTACTTCTTCTGCTCCAGCAGCGCTGCCAGCAGCCAATAAAACTTCATCTCACAATAAAAAAGCAGTAGCAATTGCGTGCGGTGTTGCTATCCCATTAGGCGTTATCCTAGTAGCTCTCATTTGCTTCCTAATATTCTGGAGACGCAGAAGGGAAAATCCAGACGATGAAAACTTACCGCATGCTATTAGTGGACCTGATTTGAATAATCCTGCAAATAAACCAAATCAAGAAAACGCTACACCTTTGAACAACCCCTTTGATGATGATGCTTCCTCGTACGATGATACTTCAATAGCAAGAAGATTGGCTGCTTTGAACACTTTGAAATTGGATAACCACTCTGCCACTGAATCTGATATTTCCAGCGTGGATGAAAAGAGAGATTCTCTATCAGGTATGAATACATACAATGATCAGTTCCAATCCCAAAGTAAAGAAGAATTATTAGCAAAACCCCCAGTACAGCCTCCAGAGAGCCCGTTCTTTGACCCACAGAATAGGTCTTCTTCTGTGTATATGGATAGTGAACCAGCAGTAAATAAATCCTGGCGATATACTGGCAACCTGTCACCAGTCTCTGATATTGTCAGAGACAGTTACGGATCACAAAAAACTGTTGATACAGAAAAACTTTTCGATTTAGAAGCACCAGAGAAGGAAAAACGTACGTCAAGGGATGTCACTATGTCTTCACTGGACCCTTGGAACAGCAATATTAGCCCTTCTCCCGTAAGAAAATCAGTAACACCATCACCATATAACGTAACGAAGCATCGTAACCGCCACTTACAAAATATTCAAGACTCTCAAAGCGGTAAAAACGGAATCACTCCCACAACAATGTCAACTTCATCTTCTGACGATTTTGTTCCGGTTAAAGATGGTGAAAATTTTTGCTGGGTCCATAGCATGGAACCAGACAGAAGACCAAGTAAGAAAAGGTTAGTAGATTTTTCAAATAAGAGTAATGTCAATGTTGGTCAAGTTAAGGACATTCACGGACGCATCCCAGAAATGCTGTGATTATACGCAACGATATTTTGCTTAATTTTATTTTCCTGTTTTATTTTTTATTAGTGGTTTACAGATACCCTATATTTTATTTAGTTTTTATACTTAGAGACATTTAATTTTAATTCCATTCTTCAAATTTCATTTTTGCACTTAAAACAAAGATCCAAAAATGCTCTCGCCCTCTTCATATTGAGAATACACTCCATTCAAAATTTTGTCGTCACCGCTGATTAATTTTTCACTAAACTGATGAATAATCAAAGGCCCCACGTCAGAACCGACTAAAGAAGTGAGTTTTATTTTAGGAGGTTGAAAACCATTATTGTCTGGTAAATTTTCATCTTCTTGACATTTAACCCAGTTTGAATCCCTTTCAATTTCTGCTTTTTCCTCCAAACTATCGACCCTCCTGTTTCTGTCCAACTTATGTCCTAGTTCCAATTCGATCGCATTAATAACTGCTTCAAATGTTATTGTGTCATCGTTGACTTTAGGTAATTTCTCCAAATGCATAATCAAACTATTTAAGGAAGATCGGAATTCGTCGAACACTTCAGTTTCCGTAATGATCTGATCGTCTTTATCCACATGTTGTAATTCACTAAAATCTAAAACGTATTTTTCAATGCATAAATCGTTCTTTTTATTAATAATGCAGATGGAAAATCTGTAAACGTGCGTTAATTTAGAAAGAACATCCAGTATAAGTTCTTCTATATAGTCAATTAAAGCAGGATGCCTATTAATGGGAACGAACTGCGGCAAGTTGAATGACTGGTAAGTAGTGTAGTCGAATGACTGAGGTGGGTATACATTTCTATAAAATAAAATCAAATTAATGTAGCATTTTAAGTATACCCTCAGCCACTTCTCTACCCATCTATTCATAAAGCTGACGCAACGATTACTATTTTTTTTTTCTTCTTGGATCTCAGTCGTCGCAAAAACGTATACCTTCTTTTTCCGACCTTTTTTTTAGCTTTCTGGAAAAGTTTATATTAGTTAAACAGGGTCTAGTCTTAGTGTGAAAGCTAGTGGTTTCGATTGACTGATATTAAGAAAGTGGAAATTAAATTAGTAGTGTAGACGTATATGCATATGTATTTCTCGCCTGTTTATGTTTCTACGTACTTTTGATTTATAGCAAGGGGAAAAGAAATACATACTATTTTTTGGTAAAGGTGAAAGCATAATGTAAAAGCTAGAATAAAATGGACGAAATAAAGAGAGGCTTAGTTCATCTTTTTTCCAAAAAGCACCCAATGATAATAACTAAAATGAAAAGGATTTGCCATCTGTCAGCAACATCAGTTGTGTGAGCAATAATAAAATCATCACCTCCGTTGCCTTTAGCGCGTTTGTCGTTTGTATCTTCCGTAATTTTAGTCTTATCAATGGGAATCATAAATTTTCCAATGAATTAGCAATTTCGTCCAATTCTTTTTGAGCTTCTTCATATTTGCTTTGGAATTCTTCGCACTTCTTTTCCCATTCATCTCTTTCTTCTTCCAAAGCAACGATCCTTCTACCCATTTGCTCAGAGTTCAAATCGGCCTCTTTCAGTTTATCCATTGCTTCCTTCAGTTTGGCTTCACTGTCTTCTAGCTGTTGTTCTAGATCCTGGTTTTTCTTGGTGTAGTTCTCATTATTAGATCTCAAGTTATTGGAGTCTTCAGCCAATTGCTTTGTATCAGACAATTGACTCTCTAACTTCTCCACTTCACTGTCGAGTTGCTCGTTTTTAGCGGACAAAGATTTAATCTCGTTTTCTTTTTCAGTGTTAGATTGCTCTAATTCTTTGAGCTGTTCTCTCAGCTCCTCATATTTTTCTTGCCATGACTCAGATTCTAATTTTAAGCTATTCAATTTCTCTTTGATC"; Assert.AreEqual(expected, seq.ToString()); // format ISequenceFormatter formatter = new GenBankFormatter(); string actual = formatter.FormatString(seq); // test the formatting Assert.AreEqual(_singleProteinSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", "")); }
public void TestGenBankWhenUserSetsDnaAlphabet() { // set correct alphabet and parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.DNA; ISequence seq = parser.ParseOne(_singleDnaSeqGenBankFilename); Assert.AreEqual(Alphabets.DNA, seq.Alphabet); // format ISequenceFormatter formatter = new GenBankFormatter(); string actual = formatter.FormatString(seq); // test the formatting Assert.AreEqual(_singleDnaSeqGenBankFileExpectedOutput.Replace(" ", ""), actual.Replace(" ", "")); }
void InvalidateGenBankParser(string node) { // Initialization of xml strings. FilePath = _utilityObj._xmlUtil.GetTextValue(node, Constants.FilePathNode); try { GenBankParser parserObj = new GenBankParser(); if (string.Equals(Constants.SimpleGenBankNodeName, node)) { parserObj.LocationBuilder = null; } else if (string.Equals(Constants.SimpleGenBankPrimaryNode, node)) { parserObj.Alphabet = Alphabets.RNA; } parserObj.ParseOne(FilePath); Assert.Fail(); } catch (InvalidOperationException) { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the exception:"); Console.WriteLine( "GenBank Parser : Successfully validated the exception:"); } catch (InvalidDataException) { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the exception:"); Console.WriteLine( "GenBank Parser : Successfully validated the exception:"); } catch (Exception) { ApplicationLog.WriteLine( "GenBank Parser : Successfully validated the exception:"); Console.WriteLine( "GenBank Parser : Successfully validated the exception:"); } }
public void TestGenBankWhenUserSetsIncorrectAlphabet() { // parse ISequenceParser parser = new GenBankParser(); parser.Alphabet = Alphabets.Protein; bool failed = false; try { ISequence seq = parser.ParseOne(_singleDnaSeqGenBankFilename); failed = true; } catch (Exception) { // all is well with the world } if (failed) { Assert.Fail("Failed to throw exception for trying to create sequence using incorrect alphabet."); } }
public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos) { // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. int[] annotatedIndex = new int[2]; annotatedIndex[0] = -1; annotatedIndex[1] = -1; string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList <BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; IList <FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (bestItem != null) { annotatedIndex[0] = i; annotatedIndex[1] = j; return(annotatedIndex); } } catch { Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); } hitsProcessed += 1; } } } } } } return(annotatedIndex); }
public void TestFeatureItem() { ISequence seq; ISequence featureSeq = null; GenBankParser parser = new GenBankParser(); string _genBankDataPath = @"TestUtils\GenBank"; seq = parser.ParseOne(_genBankDataPath + @"\BK000016-tpa.gbk"); GenBankMetadata metadata = seq.Metadata["GenBank"] as GenBankMetadata; #region Test GetSubSequence Method featureSeq = metadata.Features.All[0].GetSubSequence(seq); int start = metadata.Features.All[0].Location.Start - 1; int end = metadata.Features.All[0].Location.End - start; Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString()); featureSeq = metadata.Features.All[1].GetSubSequence(seq); start = metadata.Features.All[1].Location.Start - 1; end = metadata.Features.All[1].Location.End - start; Assert.AreEqual(featureSeq.ToString(), seq.Range(start, end).ToString()); seq = new Sequence(Alphabets.DNA, "ACGTAAAGGT"); Sequence refSeq = new Sequence(Alphabets.DNA, "AAAAATTTT"); LocationBuilder locbuilder = new LocationBuilder(); ILocation loc = locbuilder.GetLocation("join(complement(4..8),Ref1:5..7)"); Assert.AreEqual("join(complement(4..8),Ref1:5..7)", locbuilder.GetLocationString(loc)); FeatureItem fi = new FeatureItem("Feature1", loc); Dictionary <string, ISequence> refSeqs = new Dictionary <string, ISequence>(); refSeqs.Add("Ref1", refSeq); ISequence result = fi.GetSubSequence(seq, refSeqs); Assert.AreEqual("ATTTCATT", result.ToString()); #endregion #region Test GetSubFeatures Method SequenceFeatures seqFeatures = new SequenceFeatures(); FeatureItem source = new FeatureItem("Source", "1..1509"); FeatureItem mRNA = new FeatureItem("mRNA", "join(10..567,789..1320)"); FeatureItem cds = new FeatureItem("CDS", "join(54..567,789..1254)"); FeatureItem exon1 = new FeatureItem("Exon", "10..567"); FeatureItem intron = new FeatureItem("Intron", "568..788"); FeatureItem exon2 = new FeatureItem("Exon", "789..1320"); seqFeatures.All.Add(source); seqFeatures.All.Add(mRNA); seqFeatures.All.Add(cds); seqFeatures.All.Add(exon1); seqFeatures.All.Add(intron); seqFeatures.All.Add(exon2); List <FeatureItem> subFeatures = source.GetSubFeatures(seqFeatures); Assert.AreEqual(5, subFeatures.Count); subFeatures = mRNA.GetSubFeatures(seqFeatures); Assert.AreEqual(4, subFeatures.Count); subFeatures = cds.GetSubFeatures(seqFeatures); Assert.AreEqual(1, subFeatures.Count); subFeatures = exon1.GetSubFeatures(seqFeatures); Assert.AreEqual(0, subFeatures.Count); subFeatures = intron.GetSubFeatures(seqFeatures); Assert.AreEqual(0, subFeatures.Count); subFeatures = exon2.GetSubFeatures(seqFeatures); Assert.AreEqual(0, subFeatures.Count); #endregion }
public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection) { string queryName = rec.DisplayID.ToString().Split(' ')[0]; // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList<BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; // For each HSP for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity, Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage, hitsProcessed, Up.BlastMaxNumHits); // if HSP passes user-defined thresholds if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; string nextScore = "no"; if ((i + 1) < record.Hits.Count()) { if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score) { nextScore = "less than"; } else { nextScore = "equal"; } } else { nextScore = "non existent"; } // parse GI numner from hit long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; // init item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // write pairwise alignment writePairwiseAlignment(Up, blastHit, j, itemId); // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; CodingSequence bestCds = null; IList<FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (gbMeta.Features.CodingSequences.Count > 0) { bestCds = gbMeta.Features.CodingSequences[0]; } for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++) { CodingSequence cds = gbMeta.Features.CodingSequences[k]; //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start); int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start); int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start); if (cdsSize > bestSize) { bestCds = cds; } } foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case "InputOrder": facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case "NextScore": facet = new Facet(f.Name, f.Type, nextScore); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; }else{ if (bestItem != null) { value = "annotated"; }else{ value = "unannotated"; } } } facet = new Facet(f.Name, f.Type, value); break; default: //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank); facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank); break; } /* if (f.Name == "InputOrder") { facet = new Facet(f.Name, f.Type, seqPos); } else { facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item); } */ item.Facets.Add(facet); } } //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above) catch { Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); item.Href = "#"; foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case ("NextScore"): facet = new Facet(f.Name, f.Type, "no"); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { value = "unannotated"; } } facet = new Facet(f.Name, f.Type, value); break; default: facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank); break; } item.Facets.Add(facet); } //throw (e); } // Add item to collection, increment to next item, collection.Items.Add(item); hitsProcessed += 1; itemId += 1; } } } } if ((record.Hits.Count()) == 0 || (hitsProcessed == 0)) { // Init Pivot item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); item.Href = "#"; string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // Write pairwise alignment to file. writePairwiseAlignment(Up, itemId); // Set facet values for each facet category to default values foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case ("QuerySequence"): facet = new Facet(f.Name, f.Type, rec.ToString()); break; default: facet = CreateFacet(f.Name, f.Type, record, item, 0); break; } item.Facets.Add(facet); } // Add item to collection, increment to next item, skip remaining code collection.Items.Add(item); itemId += 1; hitsProcessed += 1; } } } return itemId; }
public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection) { string queryName = rec.DisplayID.ToString().Split(' ')[0]; // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList <BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; // For each HSP for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity, Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage, hitsProcessed, Up.BlastMaxNumHits); // if HSP passes user-defined thresholds if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; string nextScore = "no"; if ((i + 1) < record.Hits.Count()) { if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score) { nextScore = "less than"; } else { nextScore = "equal"; } } else { nextScore = "non existent"; } // parse GI numner from hit long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; // init item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // write pairwise alignment writePairwiseAlignment(Up, blastHit, j, itemId); // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; CodingSequence bestCds = null; IList <FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (gbMeta.Features.CodingSequences.Count > 0) { bestCds = gbMeta.Features.CodingSequences[0]; } for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++) { CodingSequence cds = gbMeta.Features.CodingSequences[k]; //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start); int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start); int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start); if (cdsSize > bestSize) { bestCds = cds; } } foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case "InputOrder": facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case "NextScore": facet = new Facet(f.Name, f.Type, nextScore); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { if (bestItem != null) { value = "annotated"; } else { value = "unannotated"; } } } facet = new Facet(f.Name, f.Type, value); break; default: //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank); facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank); break; } /* * if (f.Name == "InputOrder") * { * facet = new Facet(f.Name, f.Type, seqPos); * } * * else * { * facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item); * } */ item.Facets.Add(facet); } } //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above) catch { Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); item.Href = "#"; foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case ("NextScore"): facet = new Facet(f.Name, f.Type, "no"); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { value = "unannotated"; } } facet = new Facet(f.Name, f.Type, value); break; default: facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank); break; } item.Facets.Add(facet); } //throw (e); } // Add item to collection, increment to next item, collection.Items.Add(item); hitsProcessed += 1; itemId += 1; } } } } if ((record.Hits.Count()) == 0 || (hitsProcessed == 0)) { // Init Pivot item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); item.Href = "#"; string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // Write pairwise alignment to file. writePairwiseAlignment(Up, itemId); // Set facet values for each facet category to default values foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case ("QuerySequence"): facet = new Facet(f.Name, f.Type, rec.ToString()); break; default: facet = CreateFacet(f.Name, f.Type, record, item, 0); break; } item.Facets.Add(facet); } // Add item to collection, increment to next item, skip remaining code collection.Items.Add(item); itemId += 1; hitsProcessed += 1; } } } return(itemId); }
private void DoGenBank() { int progValue = 0; Dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Normal, new Action(delegate() { (CurrentControl as UserControl5).UserControl5Step1.Foreground = System.Windows.Media.Brushes.Black; progressBar1.Value = progValue; })); string inputDir = Up.ProjectDir + "\\xml"; if (!Directory.Exists(inputDir)) { throw new Exception("Directory " + inputDir + " does not exist."); } string[] blastXmlFiles = Directory.GetFiles(inputDir, "*.xml"); int c = 1; Stack<GenBankItem> giList = new Stack<GenBankItem>(); foreach (string blastFile in blastXmlFiles) { BlastXmlParser blastParser = new BlastXmlParser(); progValue = Convert.ToInt32(Math.Round((double)c / blastXmlFiles.Count() * 100, 0)); UpdateProgressBar(progValue, "Filtering results"); try { IList<BlastResult> blastResults = blastParser.Parse(blastFile); List<GenBankItem> recordGiList = BlastUtil.filter(blastResults, Up.BlastMaxNumHits, Up.BlastMaxEvalue, Up.BlastMinPercentIdentity, Up.BlastMinPercentQueryCoverage); foreach (GenBankItem gi in recordGiList) { giList.Push(gi); Debug.WriteLine(gi.HitStart.ToString() + " " + gi.HitEnd.ToString()); } } catch { FatalErrorDialog("Cannot parse " + blastFile); Debug.WriteLine("Cannot parse " + blastFile); } c += 1; } progValue = 0; Dispatcher.Invoke(System.Windows.Threading.DispatcherPriority.Normal, new Action(delegate() { (CurrentControl as UserControl5).UserControl5Step2.Foreground = System.Windows.Media.Brushes.Black; progressBar1.Value = progValue; })); int totalGi = giList.Count(); GenBankParser genkBankParser = new GenBankParser(); int unParsableCount = 0; int notDownloadedCount = 0; string unParsableGIs = ""; string notDownloadedGIs = ""; bool isConnected = true; if (!IsConnectedToInternet()) { isConnected = false; MessageBox.Show("Your internet connection appears to be down. As a result, missing GenBank records will not be downloaded."); } while (giList.Count > 0) { GenBankItem gitem = giList.Pop(); progValue = Convert.ToInt32(Math.Round(((totalGi - giList.Count()) / (double)totalGi) * 100, 0)); UpdateProgressBar(progValue, "Downloading GenBank records"); string outFilename = Up.ProjectDir + "\\gb\\" + gitem.Id; outFilename += "_" + gitem.HitStart.ToString(); outFilename += "_" + gitem.HitEnd.ToString(); outFilename += ".gb"; WebClient wc = new WebClient(); if (File.Exists(outFilename)) { try { ISequence gpitem = genkBankParser.ParseOne(outFilename); } catch { if (isConnected) { string url = GetGenbankUrl(gitem); try { wc.DownloadFile(url, outFilename); Thread.Sleep(1000); } catch { wc.Proxy = null; giList.Push(gitem); } try { ISequence gpitem = genkBankParser.ParseOne(outFilename); } catch { unParsableCount += 1; unParsableGIs += gitem.Id + ","; } } else { notDownloadedCount += 1; notDownloadedGIs += gitem.Id + ","; } } } else { if (isConnected) { string url = GetGenbankUrl(gitem); try { wc.DownloadFile(url, outFilename); Thread.Sleep(1000); } catch { wc.Proxy = null; giList.Push(gitem); } try { ISequence gpitem = genkBankParser.ParseOne(outFilename); } catch { unParsableCount += 1; unParsableGIs += gitem.Id + ","; } } else { notDownloadedCount += 1; notDownloadedGIs += gitem.Id + ","; } } } if (notDownloadedCount > 0) { MessageBox.Show("Error downloading GenBank records: " + notDownloadedGIs + ".\r\nThis is likely caused by an interruption in the internet connection. Re-attempt the download by repeating this step.\r\n"); } if (unParsableCount > 0) { MessageBox.Show("Error parsing GenBank records: " + unParsableGIs + ".\r\nThis is likely due to an unsupported field in the GenBank record. Contact the MBF development team at http://mbf.codeplex.com, and include one of the GI numbers in the bug report.\r\nYou can copy this message to the clipboard using Ctrl-C.\r\n"); } }
public void GenBankParserValidateParseOneWithSpecificFormats() { InitializeXmlVariables(); // Initialization of xml strings. FilePath = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.FilePathNode); AlphabetName = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.AlphabetNameNode); MolType = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.MoleculeTypeNode); IsSequenceReadOnly = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.IsReadOnlyNode); SeqId = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.SequenceIdNode); StrandTopology = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTopologyNode); StrandType = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.StrandTypeNode); Div = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DivisionNode); Version = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.VersionNode); SequenceDate = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.DateNode); PrimaryId = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.PrimaryIdNode); ExpectedSequence = _utilityObj._xmlUtil.GetTextValue( Constants.SimpleGenBankPrimaryNode, Constants.ExpectedSequenceNode); // parse BasicSequenceParser parserObj = new GenBankParser(); parserObj.Alphabet = Alphabets.Protein; parserObj.Encoding = NcbiEAAEncoding.Instance; ISequence seq = parserObj.ParseOne(FilePath); Assert.AreEqual(Utility.GetAlphabet(AlphabetName), seq.Alphabet); Assert.AreEqual(Utility.GetMoleculeType(MolType), seq.MoleculeType); Assert.AreEqual(SeqId, seq.DisplayID); Assert.AreEqual(SeqId, seq.ID); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Alphabet, Molecular type, Sequence ID and Display ID"); // test the metadata that is tricky to parse, and will not be tested implicitly by // testing the formatting GenBankMetadata metadata = (GenBankMetadata)seq.Metadata["GenBank"]; if (metadata.Locus.Strand != SequenceStrandType.None) { Assert.AreEqual(StrandType, metadata.Locus.Strand.ToString()); } Assert.AreEqual(StrandTopology.ToUpper(CultureInfo.CurrentCulture), metadata.Locus.StrandTopology.ToString().ToUpper( CultureInfo.CurrentCulture)); Assert.AreEqual(Div, metadata.Locus.DivisionCode.ToString()); Assert.AreEqual(DateTime.Parse(SequenceDate, null), metadata.Locus.Date); Assert.AreEqual(Version, metadata.Version.Version.ToString((IFormatProvider)null)); Assert.AreEqual(PrimaryId, metadata.Version.GINumber); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the StrandType, StrandTopology, Division, Date, Version, PrimaryID Properties"); // test the sequence string Assert.AreEqual(ExpectedSequence, seq.ToString()); ApplicationLog.WriteLine( "GenBank Parser BVT: Successfully validated the Sequence"); Console.WriteLine(string.Format((IFormatProvider)null, "GenBank Parser BVT: Successfully validated the Sequence '{0}'", ExpectedSequence)); }
public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos) { // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. int[] annotatedIndex = new int[2]; annotatedIndex[0] = -1; annotatedIndex[1] = -1; string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList<BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; IList<FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (bestItem != null) { annotatedIndex[0] = i; annotatedIndex[1] = j; return annotatedIndex; } } catch { Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); } hitsProcessed += 1; } } } } } } return annotatedIndex; }