public static Facet CreateFacet(string fName, string fType, BlastSearchRecord rec, Item item, int rank) { switch (fName) { case "QueryName": return new Facet(fName, fType, item.Name); case "QueryLen": return new Facet(fName, fType, rec.IterationQueryLength); case "Rank": return new Facet(fName, fType, rank); default: if (fType == "Number") { return new Facet(fName, fType, 0); } else { if (fType == "Link") { return new Facet(fName, fType, "N/A", "about:none."); } else { if (fType == "DateTime") { Facet f = new Facet(fName, fType); //Facet f = new Facet(fName, fType, dt.ToUniversalTime().ToString("o")); //DateTime dt = new DateTime(1900, 1, 1); return f; } else { return new Facet(fName, fType, "No Hit"); } } } //throw (new Exception("Facet category with name = " + fName + " does not exist.")); } }
//public static Facet CreateFacet(string fName, string fType, BlastSearchRecord rec, int hitId, int hspId, ISequence gb, Item item, string NCBIurl, CodingSequence bestCds, int rank) public static Facet CreateFacet(string fName, string fType, BlastSearchRecord rec, int hitId, int hspId, ISequence gb, Item item, string NCBIurl, FeatureItem bestItem, int rank) { Hit hit = rec.Hits[hitId]; Hsp hsp = hit.Hsps[hspId]; GenBankMetadata gbMeta = (GenBankMetadata)gb.Metadata["GenBank"]; string[] classLevels; switch (fName) { case "QueryName": return new Facet(fName, fType, item.Name); case "QueryLen": return new Facet(fName, fType, rec.IterationQueryLength); case "Rank": return new Facet(fName, fType, rank); case "Score": return new Facet(fName, fType, Math.Round(hsp.BitScore, 1)); case "Identity": double pi = (hsp.IdentitiesCount / (double)hsp.AlignmentLength) * 100.0; return new Facet(fName, fType, Math.Round(pi, 0)); case "Span": double sp = ((hsp.QueryEnd - hsp.QueryStart + 1) / (double)rec.IterationQueryLength) * 100.0; return new Facet(fName, fType, Math.Round(sp, 0)); case "SubjStart": double subjStart = hsp.HitStart; return new Facet(fName, fType, Math.Round(subjStart,0)); case "SubjLen": double subjLen = hit.Length; return new Facet(fName, fType, Math.Round(subjLen, 0)); case "Strand": string strand = FrameToStrand(hsp.QueryFrame) + "/" + FrameToStrand(hsp.HitFrame); return new Facet(fName, fType, strand); case "Species": int index = gbMeta.Source.Organism.Species.IndexOf(" ", StringComparison.Ordinal); if (index > 0) { return new Facet(fName, fType, gbMeta.Source.Organism.Genus + " " + gbMeta.Source.Organism.Species.Substring(0, index)); } else { return new Facet(fName, fType, gbMeta.Source.Organism.Genus + " " + gbMeta.Source.Organism.Species); } case "Kingdom": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 1) { return new Facet(fName, fType, classLevels[0]); } else { return new Facet(fName, fType, "N/A"); } case "Phylum": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 2) { return new Facet(fName, fType, classLevels[1]); } else { return new Facet(fName, fType, "N/A"); } case "Class": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 3) { return new Facet(fName, fType, classLevels[2]); } else { return new Facet(fName, fType, "N/A"); } case "Order": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 4) { return new Facet(fName, fType, classLevels[3]); } else { return new Facet(fName, fType, "N/A"); } case "Family": classLevels = gbMeta.Source.Organism.ClassLevels.Split(';'); if (classLevels.Length >= 5) { return new Facet(fName, fType, classLevels[4]); } else { return new Facet(fName, fType, "N/A"); } case "Lineage": return new Facet(fName, fType, gbMeta.Source.Organism.ClassLevels.ToString()); case "Organism": return new Facet(fName, fType, gbMeta.Source.CommonName); // return new Facet(fName, fType, gbMeta.Source.Organism.Genus + " " + gbMeta.Source.Organism.Species); case "Genus": return new Facet(fName, fType, gbMeta.Source.Organism.ClassLevels.Split(';').Last().Trim().TrimEnd('.')); case "Gene": string name = "N/A"; //if (bestCds != null) if (bestItem != null) { //CodingSequence feature = bestCds; FeatureItem feature = bestItem; String geneSym = "N/A"; foreach (KeyValuePair<string,List<String>> qualifier in feature.Qualifiers){ if (qualifier.Key == "gene") { geneSym = qualifier.Value[0].ToString().Trim('"'); } } if (geneSym != "") { name = geneSym; string url2 = System.Web.HttpUtility.HtmlEncode("http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=search&db=gene&term=" + name + "%5Bsym%5D"); return new Facet(fName, fType, name, url2); } } return new Facet(fName, fType, name, null); case "GI": return new Facet(fName, fType, gbMeta.Version.GINumber, NCBIurl + gbMeta.Version.GINumber); case "Accession": return new Facet(fName, fType, gbMeta.Version.CompoundAccession, NCBIurl + gbMeta.Version.CompoundAccession); case "Definition": return new Facet(fName, fType, gbMeta.Definition); case "EValue": return new Facet(fName, fType, String.Format("{0:#e+00}", hsp.EValue)); case "AlignLen": return new Facet(fName, fType, hsp.AlignmentLength, @"txt\" + item.Id + ".txt"); case "RefCount": int i = 0; foreach (CitationReference r in gbMeta.References) { if ((r.Title != "Direct Submission") && (r.Journal != "Unpublished")) { i++; } } return new Facet(fName, fType, i); case "References": if (gbMeta.References.Count() == 0) { return new Facet(fName, fType); } string url = CreateReferenceURL(gbMeta.References[0]); Facet f = new Facet(fName, fType); if (gbMeta.References.Count() > 0) { int j = 1; foreach (CitationReference r in gbMeta.References) { if (r.Title != "Direct Submission" && (r.Journal != "Unpublished")) { url = CreateReferenceURL(r); f.Add(new FacetValue(f.Type, String.Format("{0}. {1}. {2}.", j, r.Title, r.Journal), url)); j++; } } } return f; case "SubmissionDate": DateTime dt = new DateTime(gbMeta.Locus.Date.Year, gbMeta.Locus.Date.Month, gbMeta.Locus.Date.Day); return new Facet(fName, fType, dt.ToUniversalTime().ToString("o")); case "Product": Facet productFacet = new Facet(fName, fType, GetQualifierString("Protein", "product", gbMeta)); if (productFacet[0].Value == "N/A") { Console.WriteLine(productFacet[0].Value + "!!!!!!!!!!!!!!!!!!!!!!!!!!***********"); if (bestItem != null) { productFacet = new Facet(fName, fType, GetQualifierStringFromCDS(bestItem, "product")); } Console.WriteLine(productFacet[0].Value + "!!!!!!!!!!!!!!!!!!!!!!!!&&&&&&&&&&&&&"); } return productFacet; case "Function": Facet funcFacet = new Facet(fName, fType, GetQualifierString("Protein", "function", gbMeta)); if (funcFacet[0].Value == "N/A") { if (bestItem != null) { funcFacet = new Facet(fName, fType, GetQualifierStringFromCDS(bestItem, "function")); } } return funcFacet; default: throw (new Exception("Facet category with name = " + fName + " does not exist.")); } }
public static int CreateItems(UIParameters Up, ISequence rec, int itemId, int seqPos, Collection collection) { string queryName = rec.DisplayID.ToString().Split(' ')[0]; // BLAST reports are saved in individual files by query and // numbered in the same order as they appear in the input FASTA file. string blastFile = Up.ProjectDir + "\\xml\\" + seqPos + ".xml"; if (!File.Exists(blastFile)) { throw new Exception("File does not exist."); } BlastXmlParser blastParser = new BlastXmlParser(); IList<BlastResult> blastResults = blastParser.Parse(blastFile); GenBankParser gbParser = new GenBankParser(); int[] annotatedIndex = GetBestAnnotatedIndex(Up, seqPos); // iterate through the BLAST results. foreach (BlastResult blastResult in blastResults) { foreach (BlastSearchRecord record in blastResult.Records) { int hitsProcessed = 0; // If there are not hits in the BLAST result ... int rank = 0; if (record.Hits.Count() > 0) { // For each hit for (int i = 0; i < record.Hits.Count(); i++) { Hit blastHit = record.Hits[i]; // For each HSP for (int j = 0; j < blastHit.Hsps.Count(); j++) { Hsp blastHsp = blastHit.Hsps[j]; double percentId = (blastHsp.IdentitiesCount / (double)blastHsp.AlignmentLength) * 100; double queryCoverage = ((double)(blastHsp.QueryEnd - blastHsp.QueryStart + 1) / record.IterationQueryLength) * 100; string txt = String.Format("{0} {1} {2} {3} {4} {5} {6} {7}", percentId, Up.BlastMinPercentIdentity, Up.BlastMaxEvalue, blastHsp.EValue, queryCoverage, Up.BlastMinPercentQueryCoverage, hitsProcessed, Up.BlastMaxNumHits); // if HSP passes user-defined thresholds if ((percentId >= Up.BlastMinPercentIdentity) && (Up.BlastMaxEvalue >= blastHsp.EValue) && (queryCoverage >= Up.BlastMinPercentQueryCoverage) && (hitsProcessed < Up.BlastMaxNumHits)) { rank += 1; string nextScore = "no"; if ((i + 1) < record.Hits.Count()) { if (blastHsp.Score > record.Hits[i + 1].Hsps[0].Score) { nextScore = "less than"; } else { nextScore = "equal"; } } else { nextScore = "non existent"; } // parse GI numner from hit long gi = Convert.ToInt64(blastHit.Id.Split('|')[1]); GenBankItem gitem = new GenBankItem(gi, blastHsp.HitStart, blastHsp.HitEnd); string gbFile = Up.ProjectDir + "\\gb\\" + gitem.Id.ToString(); gbFile += "_" + gitem.HitStart.ToString(); gbFile += "_" + gitem.HitEnd.ToString(); gbFile += ".gb"; // init item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // write pairwise alignment writePairwiseAlignment(Up, blastHit, j, itemId); // try to parse the GB record associated with the hit and set facet values to data from BLAST/GB record try { Console.WriteLine("GB OK: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); ISequence gbRecord = gbParser.ParseOne(gbFile); item.Href = GetNCBIUrl(Up.BlastProgram) + GetGenBankIdentifier(gbRecord); GenBankMetadata gbMeta = (GenBankMetadata)gbRecord.Metadata["GenBank"]; CodingSequence bestCds = null; IList<FeatureItem> features = gbMeta.Features.All; FeatureItem bestItem = getBestFeatureItem(features); if (gbMeta.Features.CodingSequences.Count > 0) { bestCds = gbMeta.Features.CodingSequences[0]; } for (int k = 1; k < gbMeta.Features.CodingSequences.Count; k++) { CodingSequence cds = gbMeta.Features.CodingSequences[k]; //int bestSize = Math.Abs(bestCds.Location.End - bestCds.Location.Start); int bestSize = Math.Abs(bestItem.Location.End - bestItem.Location.Start); int cdsSize = Math.Abs(cds.Location.End - cds.Location.Start); if (cdsSize > bestSize) { bestCds = cds; } } foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case "InputOrder": facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case "NextScore": facet = new Facet(f.Name, f.Type, nextScore); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; }else{ if (bestItem != null) { value = "annotated"; }else{ value = "unannotated"; } } } facet = new Facet(f.Name, f.Type, value); break; default: //facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestCds, rank); facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item, GetNCBIUrl(Up.BlastProgram), bestItem, rank); break; } /* if (f.Name == "InputOrder") { facet = new Facet(f.Name, f.Type, seqPos); } else { facet = CreateFacet(f.Name, f.Type, record, i, j, gbRecord, item); } */ item.Facets.Add(facet); } } //catch (System.NullReferenceException e) // if parsing failed init the item w/ default values (similar to 'no hit' above) catch { Console.WriteLine("GB ERROR: " + record.Hits[0].Id + " " + i.ToString() + " " + j.ToString()); item.Href = "#"; foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case "QuerySequence": facet = new Facet(f.Name, f.Type, rec.ToString()); break; case ("NextScore"): facet = new Facet(f.Name, f.Type, "no"); break; case "Annotated": string value = "na"; if ((annotatedIndex[0] == i) && (annotatedIndex[1] == j)) { value = "top_annotated"; } else { if ((i == 0) && (j == 0) && (annotatedIndex[0] == -1) && (annotatedIndex[1] == -1)) { value = "top_unannotated"; } else { value = "unannotated"; } } facet = new Facet(f.Name, f.Type, value); break; default: facet = CreateGBErrorFacet(f.Name, f.Type, record, i, j, item, GetNCBIUrl(Up.BlastProgram), rank); break; } item.Facets.Add(facet); } //throw (e); } // Add item to collection, increment to next item, collection.Items.Add(item); hitsProcessed += 1; itemId += 1; } } } } if ((record.Hits.Count()) == 0 || (hitsProcessed == 0)) { // Init Pivot item string img = "#" + itemId.ToString(); Item item = new Item(itemId, img); item.Href = "#"; string[] headerTokens = parseFastaHeader(rec.DisplayID.ToString()); item.Name = headerTokens[0]; item.Description = headerTokens[1]; // Write pairwise alignment to file. writePairwiseAlignment(Up, itemId); // Set facet values for each facet category to default values foreach (FacetCategory f in Up.FacetCategories) { Facet facet = new Facet(); switch (f.Name) { case ("InputOrder"): facet = new Facet(f.Name, f.Type, seqPos); break; case ("QuerySequence"): facet = new Facet(f.Name, f.Type, rec.ToString()); break; default: facet = CreateFacet(f.Name, f.Type, record, item, 0); break; } item.Facets.Add(facet); } // Add item to collection, increment to next item, skip remaining code collection.Items.Add(item); itemId += 1; hitsProcessed += 1; } } } return itemId; }