public ActionResult Index(bool reinmportSequences) { return(CreateTask(() => { if (reinmportSequences) { throw new NotImplementedException(); } Dictionary <string, AccessionUpdateSearchResult> sequencesData = new Dictionary <string, AccessionUpdateSearchResult>(); using (var db = new LibiadaWebEntities()) { var dnaSequenceRepository = new GeneticSequenceRepository(db); var sequencesWithAccessions = db.DnaSequence .Include(ds => ds.Matter) .Where(ds => ds.Notation == Notation.Nucleotides && !string.IsNullOrEmpty(ds.RemoteId)) .ToArray(); sequencesData = sequencesWithAccessions .ToDictionary(s => s.RemoteId.Split('.')[0], s => new AccessionUpdateSearchResult() { LocalAccession = s.RemoteId, LocalVersion = Convert.ToByte(s.RemoteId.Split('?')[0].Split('.')[1]), Name = s.Matter.Name.Split('|')[0].Trim(), LocalUpdateDate = s.Matter.Modified.ToString(OutputFormats.DateFormat), LocalUpdateDateTime = s.Matter.Modified }); } List <NuccoreObject> searchResults = new List <NuccoreObject>(); // slicing accessions into chunks to prevent "too long request" error string[] accessions = sequencesData.Keys.ToArray(); const int maxChunkSize = 10000; for (int i = 0; i < accessions.Length; i += maxChunkSize) { int actualChunkSize = Math.Min(maxChunkSize, accessions.Length - i); var accessionsChunk = new string[actualChunkSize]; Array.Copy(accessions, i, accessionsChunk, 0, actualChunkSize); (string ncbiWebEnvironment, string queryKey) = NcbiHelper.ExecuteEPostRequest(string.Join(",", accessionsChunk)); searchResults.AddRange(NcbiHelper.ExecuteESummaryRequest(ncbiWebEnvironment, queryKey, true)); } for (int i = 0; i < searchResults.Count; i++) { var searchResult = searchResults[i]; searchResult.Title = searchResult.Title.TrimEnd(".") .TrimEnd(", complete genome") .TrimEnd(", complete sequence") .TrimEnd(", complete CDS") .TrimEnd(", complete cds") .TrimEnd(", genome"); var newAccession = searchResult.AccessionVersion.Split('.'); var sequenceData = sequencesData[newAccession[0]]; sequenceData.RemoteVersion = Convert.ToByte(newAccession[1]); sequenceData.RemoteName = searchResult.Title; sequenceData.RemoteOrganism = searchResult.Organism; sequenceData.RemoteUpdateDate = searchResult.UpdateDate.ToString(OutputFormats.DateFormat); sequenceData.Updated = sequenceData.LocalUpdateDateTime <= searchResult.UpdateDate || sequenceData.RemoteVersion > sequenceData.LocalVersion; sequenceData.NameUpdated = !(sequenceData.Name.Contains(searchResult.Title) && sequenceData.Name.Contains(searchResult.Organism)); } var result = new Dictionary <string, object> { { "results", sequencesData.Values .OrderByDescending(r => r.RemoteVersion - r.LocalVersion) .ThenBy(r => r.Updated) .ThenBy(r => r.NameUpdated) } }; return new Dictionary <string, string> { { "data", JsonConvert.SerializeObject(result) } }; })); }
public ActionResult Index( string searchQuery, bool importGenes, bool importPartial, bool filterMinLength, int minLength, bool filterMaxLength, int maxLength) { return(CreateTask(() => { string searchResults; string[] accessions; List <NuccoreObject> nuccoreObjects; if (filterMinLength) { searchResults = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength); } else { searchResults = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength: 1, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery); } nuccoreObjects = NcbiHelper.ExecuteESummaryRequest(searchResults, importPartial); accessions = nuccoreObjects.Select(no => no.AccessionVersion.Split('.')[0]).Distinct().ToArray(); var importResults = new List <MatterImportResult>(accessions.Length); using (var db = new LibiadaWebEntities()) { var matterRepository = new MatterRepository(db); var dnaSequenceRepository = new GeneticSequenceRepository(db); var(existingAccessions, accessionsToImport) = dnaSequenceRepository.SplitAccessionsIntoExistingAndNotImported(accessions); importResults.AddRange(existingAccessions.ConvertAll(existingAccession => new MatterImportResult { MatterName = existingAccession, Result = "Sequence already exists", Status = "Exists" })); foreach (string accession in accessionsToImport) { var importResult = new MatterImportResult() { MatterName = accession }; try { ISequence bioSequence = NcbiHelper.DownloadGenBankSequence(accession); GenBankMetadata metadata = NcbiHelper.GetMetadata(bioSequence); importResult.MatterName = metadata.Version.CompoundAccession; Matter matter = matterRepository.CreateMatterFromGenBankMetadata(metadata); importResult.SequenceType = matter.SequenceType.GetDisplayValue(); importResult.Group = matter.Group.GetDisplayValue(); importResult.MatterName = matter.Name; importResult.AllNames = $"Common name = {metadata.Source.CommonName}, " + $"Species = {metadata.Source.Organism.Species}, " + $"Definition = {metadata.Definition}, " + $"Saved matter name = {importResult.MatterName}"; var sequence = new CommonSequence { Matter = matter, Notation = Notation.Nucleotides, RemoteDb = RemoteDb.GenBank, RemoteId = metadata.Version.CompoundAccession }; bool partial = metadata.Definition.ToLower().Contains("partial"); dnaSequenceRepository.Create(sequence, bioSequence, partial); (importResult.Result, importResult.Status) = importGenes ? ImportFeatures(metadata, sequence) : ("Successfully imported sequence", "Success"); } catch (Exception exception) { importResult.Status = "Error"; importResult.Result = $"Error: {exception.Message}"; while (exception.InnerException != null) { exception = exception.InnerException; importResult.Result += $" {exception.Message}"; } foreach (var dbEntityEntry in db.ChangeTracker.Entries()) { if (dbEntityEntry.Entity != null) { dbEntityEntry.State = EntityState.Detached; } } } finally { importResults.Add(importResult); } } string[] names = importResults.Select(r => r.MatterName).ToArray(); // removing matters for which adding of sequence failed Matter[] orphanMatters = db.Matter .Include(m => m.Sequence) .Where(m => names.Contains(m.Name) && m.Sequence.Count == 0) .ToArray(); if (orphanMatters.Length > 0) { db.Matter.RemoveRange(orphanMatters); db.SaveChanges(); } } var result = new Dictionary <string, object> { { "result", importResults } }; return new Dictionary <string, string> { { "data", JsonConvert.SerializeObject(result) } }; })); }
public ActionResult Index( string searchQuery, bool importPartial, bool filterMinLength, int minLength, bool filterMaxLength, int maxLength) { return(CreateTask(() => { if (filterMinLength) { searchQuery = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength); } else { searchQuery = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery); } List <NuccoreObject> searchResults = NcbiHelper.ExecuteESummaryRequest(searchQuery, importPartial); List <NuccoreObject> unfilteredSearchResults; List <NuccoreObject> filteresOutSearchResults = searchResults; string[] accessions; if (!importPartial) { unfilteredSearchResults = NcbiHelper.ExecuteESummaryRequest(searchQuery, true); filteresOutSearchResults = unfilteredSearchResults.Except(searchResults).ToList(); accessions = unfilteredSearchResults.Select(no => no.AccessionVersion.Split('.')[0]).Distinct().ToArray(); } else { accessions = searchResults.Select(no => no.AccessionVersion.Split('.')[0]).Distinct().ToArray(); } var results = new List <MatterImportResult>(accessions.Length); string[] existingAccessions; using (var db = new LibiadaWebEntities()) { var dnaSequenceRepository = new GeneticSequenceRepository(db); (existingAccessions, _) = dnaSequenceRepository.SplitAccessionsIntoExistingAndNotImported(accessions); } searchResults = searchResults .Where(sr => !existingAccessions.Contains(sr.AccessionVersion.Split('.')[0])) .ToList(); foreach (var searchResult in searchResults) { results.Add(new MatterImportResult() { MatterName = $"{searchResult.Title} | {searchResult.AccessionVersion}", Result = "Found new sequence", Status = "Success" }); } results.AddRange(existingAccessions.ConvertAll(existingAccession => new MatterImportResult { MatterName = existingAccession, Result = "Sequence already exists", Status = "Exists" })); if (!importPartial) { filteresOutSearchResults = filteresOutSearchResults .Where(sr => !existingAccessions.Contains(sr.AccessionVersion.Split('.')[0])) .ToList(); foreach (var filteresOutSearchResult in filteresOutSearchResults) { results.Add(new MatterImportResult() { MatterName = $"{filteresOutSearchResult.Title} | {filteresOutSearchResult.AccessionVersion}", Result = "Filtered out", Status = "Error" }); } } accessions = searchResults.Select(sr => sr.AccessionVersion).ToArray(); var result = new Dictionary <string, object> { { "result", results }, { "accessions", accessions } }; return new Dictionary <string, string> { { "data", JsonConvert.SerializeObject(result) } }; })); }