public static List <GenBankItem> filter(IList <BlastResult> blastResults, int maxHits = 1, double maxEvalue = 10, double minPercentId = 0.0, double minQueryCoverage = 0.0) { List <GenBankItem> giList = new List <GenBankItem>(); foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int numHits = 0; for (int i = 0; i < record.Hits.Count; i++) { Hit blastHit = record.Hits[i]; long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); for (int j = 0; j < blastHit.Hsps.Count; j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((blastHsp.QueryEnd - blastHsp.QueryStart + 1) / (double)record.IterationQueryLength) * 100; //double queryCoverage = (blastHsp.AlignmentLength / (double)queryLen) * 100; if ((percentId >= minPercentId) && (maxEvalue >= blastHsp.EValue) && (queryCoverage >= minQueryCoverage)) { if (numHits < maxHits) { GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); giList.Add(gitem); } else { break; } numHits += 1; } } } } } return(giList); }
public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection) { string queryName = rec.DisplayID.ToString().Split(' ')[0]; // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList <BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; // For each HSP for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity, Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage, hitsProcessed, Up.BlastMaxNumHits); // if HSP passes user-defined thresholds if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; string nextScore = "no"; if ((i + 1) < record.Hits.Count()) { if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score) { nextScore = "less than"; } else { nextScore = "equal"; } } else { nextScore = "non existent"; } // parse GI numner from hit long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; // init item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // write pairwise alignment writePairwiseAlignment(Up, blastHit, j, itemId); // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; CodingSequence bestCds = null; IList <FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (gbMeta.Features.CodingSequences.Count > 0) { bestCds = gbMeta.Features.CodingSequences[0]; } for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++) { CodingSequence cds = gbMeta.Features.CodingSequences[k]; //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start); int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start); int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start); if (cdsSize > bestSize) { bestCds = cds; } } foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case "InputOrder": facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case "NextScore": facet = new Facet(f.Name, f.Type, nextScore); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { if (bestItem != null) { value = "annotated"; } else { value = "unannotated"; } } } facet = new Facet(f.Name, f.Type, value); break; default: //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank); facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank); break; } /* * if (f.Name == "InputOrder") * { * facet = new Facet(f.Name, f.Type, seqPos); * } * * else * { * facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item); * } */ item.Facets.Add(facet); } } //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above) catch { Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); item.Href = "#"; foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case ("NextScore"): facet = new Facet(f.Name, f.Type, "no"); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { value = "unannotated"; } } facet = new Facet(f.Name, f.Type, value); break; default: facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank); break; } item.Facets.Add(facet); } //throw (e); } // Add item to collection, increment to next item, collection.Items.Add(item); hitsProcessed += 1; itemId += 1; } } } } if ((record.Hits.Count()) == 0 || (hitsProcessed == 0)) { // Init Pivot item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); item.Href = "#"; string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // Write pairwise alignment to file. writePairwiseAlignment(Up, itemId); // Set facet values for each facet category to default values foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case ("QuerySequence"): facet = new Facet(f.Name, f.Type, rec.ToString()); break; default: facet = CreateFacet(f.Name, f.Type, record, item, 0); break; } item.Facets.Add(facet); } // Add item to collection, increment to next item, skip remaining code collection.Items.Add(item); itemId += 1; hitsProcessed += 1; } } } return(itemId); }
public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos) { // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. int[] annotatedIndex = new int[2]; annotatedIndex[0] = -1; annotatedIndex[1] = -1; string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList <BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; IList <FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (bestItem != null) { annotatedIndex[0] = i; annotatedIndex[1] = j; return(annotatedIndex); } } catch { Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); } hitsProcessed += 1; } } } } } } return(annotatedIndex); }
public static List<GenBankItem> filter(IList<BlastResult> blastResults, int maxHits = 1, double maxEvalue = 10, double minPercentId = 0.0, double minQueryCoverage = 0.0) { List<GenBankItem> giList = new List<GenBankItem>(); foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int numHits = 0; for (int i = 0; i < record.Hits.Count; i++) { Hit blastHit = record.Hits[i]; long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); for (int j = 0; j < blastHit.Hsps.Count; j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((blastHsp.QueryEnd - blastHsp.QueryStart + 1) / (double)record.IterationQueryLength) * 100; //double queryCoverage = (blastHsp.AlignmentLength / (double)queryLen) * 100; if ((percentId >= minPercentId) && (maxEvalue >= blastHsp.EValue) && (queryCoverage >= minQueryCoverage)) { if (numHits < maxHits) { GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); giList.Add(gitem); } else { break; } numHits += 1; } } } } } return giList; }
private string GetGenbankUrl(GenBankItem gitem){ string url = ""; if ((Up.BlastProgram == "blastn") || (Up.BlastProgram == "tblastn") || (Up.BlastProgram == "tblastx")) { url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?tool=FetchGenBank&[email protected]&db=nucleotide&retmode=text&rettype=gb&id="; url += gitem.Id.ToString(); url += "&seq_start=" + gitem.HitStart; url += "&seq_stop=" + gitem.HitEnd; //System.Windows.Forms.MessageBox.Show(url); } else { url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?tool=FetchGenBank&[email protected]&db=protein&retmode=text&rettype=gp&id="; url += gitem.Id.ToString(); //MessageBox.Show("Protein!"); } return url; }
public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection) { string queryName = rec.DisplayID.ToString().Split(' ')[0]; // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList<BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; // For each HSP for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity, Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage, hitsProcessed, Up.BlastMaxNumHits); // if HSP passes user-defined thresholds if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; string nextScore = "no"; if ((i + 1) < record.Hits.Count()) { if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score) { nextScore = "less than"; } else { nextScore = "equal"; } } else { nextScore = "non existent"; } // parse GI numner from hit long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; // init item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // write pairwise alignment writePairwiseAlignment(Up, blastHit, j, itemId); // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; CodingSequence bestCds = null; IList<FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (gbMeta.Features.CodingSequences.Count > 0) { bestCds = gbMeta.Features.CodingSequences[0]; } for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++) { CodingSequence cds = gbMeta.Features.CodingSequences[k]; //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start); int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start); int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start); if (cdsSize > bestSize) { bestCds = cds; } } foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case "InputOrder": facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case "NextScore": facet = new Facet(f.Name, f.Type, nextScore); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; }else{ if (bestItem != null) { value = "annotated"; }else{ value = "unannotated"; } } } facet = new Facet(f.Name, f.Type, value); break; default: //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank); facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank); break; } /* if (f.Name == "InputOrder") { facet = new Facet(f.Name, f.Type, seqPos); } else { facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item); } */ item.Facets.Add(facet); } } //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above) catch { Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); item.Href = "#"; foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case ("NextScore"): facet = new Facet(f.Name, f.Type, "no"); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { value = "unannotated"; } } facet = new Facet(f.Name, f.Type, value); break; default: facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank); break; } item.Facets.Add(facet); } //throw (e); } // Add item to collection, increment to next item, collection.Items.Add(item); hitsProcessed += 1; itemId += 1; } } } } if ((record.Hits.Count()) == 0 || (hitsProcessed == 0)) { // Init Pivot item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); item.Href = "#"; string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // Write pairwise alignment to file. writePairwiseAlignment(Up, itemId); // Set facet values for each facet category to default values foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case ("QuerySequence"): facet = new Facet(f.Name, f.Type, rec.ToString()); break; default: facet = CreateFacet(f.Name, f.Type, record, item, 0); break; } item.Facets.Add(facet); } // Add item to collection, increment to next item, skip remaining code collection.Items.Add(item); itemId += 1; hitsProcessed += 1; } } } return itemId; }
public static int[] GetBestAnnotatedIndex(UIParameters Up, int seqPos) { // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. int[] annotatedIndex = new int[2]; annotatedIndex[0] = -1; annotatedIndex[1] = -1; string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList<BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; IList<FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (bestItem != null) { annotatedIndex[0] = i; annotatedIndex[1] = j; return annotatedIndex; } } catch { Console.WriteLine("ISANNOTATED: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); } hitsProcessed += 1; } } } } } } return annotatedIndex; }