public ActionResult Index( OrderTransformation[] transformationsSequence, int iterationsCount, string[] customSequences, bool localFile, HttpPostedFileBase[] file) { return(CreateTask(() => { int sequencesCount = localFile ? Request.Files.Count : customSequences.Length; var sourceSequences = new string[sequencesCount]; var sequences = new Chain[sequencesCount]; var names = new string[sequencesCount]; for (int i = 0; i < sequencesCount; i++) { if (localFile) { var sequenceStream = FileHelper.GetFileStream(file[i]); var fastaSequence = NcbiHelper.GetFastaSequence(sequenceStream); sourceSequences[i] = fastaSequence.ConvertToString(); names[i] = fastaSequence.ID; } else { sourceSequences[i] = customSequences[i]; names[i] = $"Custom sequence {i + 1}. Length: {customSequences[i].Length}"; } } for (int k = 0; k < sequencesCount; k++) { sequences[k] = new Chain(sourceSequences[k]); for (int j = 0; j < iterationsCount; j++) { for (int i = 0; i < transformationsSequence.Length; i++) { sequences[k] = transformationsSequence[i] == OrderTransformation.Dissimilar ? DissimilarChainFactory.Create(sequences[k]) : HighOrderFactory.Create(sequences[k], EnumExtensions.GetLink(transformationsSequence[i])); } } } var transformations = transformationsSequence.Select(ts => ts.GetDisplayValue()); var result = new Dictionary <string, object> { { "names", names }, { "sequences", sequences.Select((s, i) => new { name = names[i], value = s.ToString(" ") }).ToArray() }, { "transformationsList", transformations }, { "iterationsCount", iterationsCount } }; return new Dictionary <string, string> { { "data", JsonConvert.SerializeObject(result) } }; })); }
public void MinLengthPartialFalseInGetIdFromFileTest() { var txtReader = new StreamReader($"{SystemData.ProjectFolderPathForNcbiHelper}nuccore_result2.txt"); var textFromFile = txtReader.ReadToEnd(); var result = NcbiHelper.GetIdsFromNcbiSearchResults(textFromFile, false, minLength: 1000); int expectedSequencesCount = 415; Assert.AreEqual(expectedSequencesCount, result.Length); }
public void GetIDFromFileTest() { var txtReader = new StreamReader($"{SystemData.ProjectFolderPathForNcbiHelper}nuccore_result.txt"); var textFromFile = txtReader.ReadToEnd(); var result = NcbiHelper.GetIdsFromNcbiSearchResults(textFromFile, true); int expectedSequencesCount = 2111; Assert.AreEqual(expectedSequencesCount, result.Length); }
public void IncludePartialInGetIdFromFileTest() { var txtReader = new StreamReader($"{SystemData.ProjectFolderPathForNcbiHelper}nuccore_result2.txt"); var textFromFile = txtReader.ReadToEnd(); var result = NcbiHelper.GetIdsFromNcbiSearchResults(textFromFile, false); int expectedSequencesCount = 1447; int partialSequences = 823; expectedSequencesCount -= partialSequences; Assert.AreEqual(expectedSequencesCount, result.Length); }
public ActionResult Index( string searchQuery, bool importGenes, bool importPartial, bool filterMinLength, int minLength, bool filterMaxLength, int maxLength) { return(CreateTask(() => { string searchResults; string[] accessions; List <NuccoreObject> nuccoreObjects; if (filterMinLength) { searchResults = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength); } else { searchResults = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength: 1, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery); } nuccoreObjects = NcbiHelper.ExecuteESummaryRequest(searchResults, importPartial); accessions = nuccoreObjects.Select(no => no.AccessionVersion.Split('.')[0]).Distinct().ToArray(); var importResults = new List <MatterImportResult>(accessions.Length); using (var db = new LibiadaWebEntities()) { var matterRepository = new MatterRepository(db); var dnaSequenceRepository = new GeneticSequenceRepository(db); var(existingAccessions, accessionsToImport) = dnaSequenceRepository.SplitAccessionsIntoExistingAndNotImported(accessions); importResults.AddRange(existingAccessions.ConvertAll(existingAccession => new MatterImportResult { MatterName = existingAccession, Result = "Sequence already exists", Status = "Exists" })); foreach (string accession in accessionsToImport) { var importResult = new MatterImportResult() { MatterName = accession }; try { ISequence bioSequence = NcbiHelper.DownloadGenBankSequence(accession); GenBankMetadata metadata = NcbiHelper.GetMetadata(bioSequence); importResult.MatterName = metadata.Version.CompoundAccession; Matter matter = matterRepository.CreateMatterFromGenBankMetadata(metadata); importResult.SequenceType = matter.SequenceType.GetDisplayValue(); importResult.Group = matter.Group.GetDisplayValue(); importResult.MatterName = matter.Name; importResult.AllNames = $"Common name = {metadata.Source.CommonName}, " + $"Species = {metadata.Source.Organism.Species}, " + $"Definition = {metadata.Definition}, " + $"Saved matter name = {importResult.MatterName}"; var sequence = new CommonSequence { Matter = matter, Notation = Notation.Nucleotides, RemoteDb = RemoteDb.GenBank, RemoteId = metadata.Version.CompoundAccession }; bool partial = metadata.Definition.ToLower().Contains("partial"); dnaSequenceRepository.Create(sequence, bioSequence, partial); (importResult.Result, importResult.Status) = importGenes ? ImportFeatures(metadata, sequence) : ("Successfully imported sequence", "Success"); } catch (Exception exception) { importResult.Status = "Error"; importResult.Result = $"Error: {exception.Message}"; while (exception.InnerException != null) { exception = exception.InnerException; importResult.Result += $" {exception.Message}"; } foreach (var dbEntityEntry in db.ChangeTracker.Entries()) { if (dbEntityEntry.Entity != null) { dbEntityEntry.State = EntityState.Detached; } } } finally { importResults.Add(importResult); } } string[] names = importResults.Select(r => r.MatterName).ToArray(); // removing matters for which adding of sequence failed Matter[] orphanMatters = db.Matter .Include(m => m.Sequence) .Where(m => names.Contains(m.Name) && m.Sequence.Count == 0) .ToArray(); if (orphanMatters.Length > 0) { db.Matter.RemoveRange(orphanMatters); db.SaveChanges(); } } var result = new Dictionary <string, object> { { "result", importResults } }; return new Dictionary <string, string> { { "data", JsonConvert.SerializeObject(result) } }; })); }
public ActionResult Index(short[] characteristicLinkIds, string[] customSequences, bool localFile, string fileType, bool toLower, bool removePunctuation) { return(CreateTask(() => { int sequencesCount = localFile ? Request.Files.Count : customSequences.Length; var sequencesNames = new string[sequencesCount]; var sequences = new Chain[sequencesCount]; if (localFile) { for (int i = 0; i < sequencesCount; i++) { Stream sequenceStream = FileHelper.GetFileStream(Request.Files[i]); sequencesNames[i] = Request.Files[i].FileName; switch (fileType) { case "text": using (var sr = new StreamReader(sequenceStream)) { string stringTextSequence = sr.ReadToEnd(); if (toLower) { stringTextSequence = stringTextSequence.ToLower(); } if (removePunctuation) { stringTextSequence = Regex.Replace(stringTextSequence, @"[^\w\s]", ""); } sequences[i] = new Chain(stringTextSequence); } break; case "image": var image = Image.Load(sequenceStream); var sequence = ImageProcessor.ProcessImage(image, new IImageTransformer[0], new IMatrixTransformer[0], new LineOrderExtractor()); var alphabet = new Alphabet { NullValue.Instance() }; var incompleteAlphabet = sequence.Alphabet; for (int j = 0; j < incompleteAlphabet.Cardinality; j++) { alphabet.Add(incompleteAlphabet[j]); } sequences[i] = new Chain(sequence.Building, alphabet); break; case "genetic": ISequence fastaSequence = NcbiHelper.GetFastaSequence(sequenceStream); var stringSequence = fastaSequence.ConvertToString(); sequences[i] = new Chain(stringSequence); sequencesNames[i] = fastaSequence.ID; break; case "wavFile": var reader = new BinaryReader(Request.Files[i].InputStream); int chunkID = reader.ReadInt32(); int fileSize = reader.ReadInt32(); int riffType = reader.ReadInt32(); int fmtID = reader.ReadInt32(); int fmtSize = reader.ReadInt32(); int fmtCode = reader.ReadInt16(); int channels = reader.ReadInt16(); int sampleRate = reader.ReadInt32(); int fmtAvgBPS = reader.ReadInt32(); int fmtBlockAlign = reader.ReadInt16(); int bitDepth = reader.ReadInt16(); if (fmtSize == 18) { // Read any extra values int fmtExtraSize = reader.ReadInt16(); reader.ReadBytes(fmtExtraSize); } int dataID = reader.ReadInt32(); int dataSize = reader.ReadInt32(); byte[] byteArray = reader.ReadBytes(dataSize); var shortArray = new short[byteArray.Length / 2]; Buffer.BlockCopy(byteArray, 0, shortArray, 0, byteArray.Length); //shortArray = Amplitude(shortArray, 20); shortArray = Sampling(shortArray, 50); //shortArray = shortArray.Select(s => (short)(s / 10)).ToArray(); sequences[i] = new Chain(shortArray); break; default: throw new ArgumentException("Unknown file type", nameof(fileType)); } } } else { for (int i = 0; i < sequencesCount; i++) { sequences[i] = new Chain(customSequences[i]); sequencesNames[i] = $"Custom sequence {i + 1}. Length: {customSequences[i].Length}"; } } var sequencesCharacteristics = new SequenceCharacteristics[sequences.Length]; for (int j = 0; j < sequences.Length; j++) { var characteristics = new double[characteristicLinkIds.Length]; for (int k = 0; k < characteristicLinkIds.Length; k++) { Link link = characteristicTypeLinkRepository.GetLinkForCharacteristic(characteristicLinkIds[k]); FullCharacteristic characteristic = characteristicTypeLinkRepository.GetCharacteristic(characteristicLinkIds[k]); IFullCalculator calculator = FullCalculatorsFactory.CreateCalculator(characteristic); characteristics[k] = calculator.Calculate(sequences[j], link); } sequencesCharacteristics[j] = new SequenceCharacteristics { MatterName = sequencesNames[j], Characteristics = characteristics }; } var characteristicNames = new string[characteristicLinkIds.Length]; var characteristicsList = new SelectListItem[characteristicLinkIds.Length]; for (int k = 0; k < characteristicLinkIds.Length; k++) { characteristicNames[k] = characteristicTypeLinkRepository.GetCharacteristicName(characteristicLinkIds[k]); characteristicsList[k] = new SelectListItem { Value = k.ToString(), Text = characteristicNames[k], Selected = false }; } var result = new Dictionary <string, object> { { "characteristics", sequencesCharacteristics }, { "characteristicNames", characteristicNames }, { "characteristicsList", characteristicsList } }; return new Dictionary <string, object> { { "data", JsonConvert.SerializeObject(result) } }; })); }
/// <summary> /// Initializes a new instance of the <see cref="SubsequenceImporter"/> class. /// </summary> /// <param name="sequence"> /// Dna sequence for which subsequences will be imported. /// </param> public SubsequenceImporter(DnaSequence sequence) : this(NcbiHelper.GetFeatures(sequence.RemoteId), sequence.Id) { }
public ActionResult Index( OrderTransformation[] transformationsSequence, int iterationsCount, short[] characteristicLinkIds, string[] customSequences, bool localFile, HttpPostedFileBase[] file) { return(CreateTask(() => { var characteristicTypeLinkRepository = FullCharacteristicRepository.Instance; int sequencesCount = localFile ? Request.Files.Count : customSequences.Length; var sequences = new string[sequencesCount]; var sequencesNames = new string[sequencesCount]; for (int i = 0; i < sequencesCount; i++) { if (localFile) { Stream sequenceStream = FileHelper.GetFileStream(file[i]); ISequence fastaSequence = NcbiHelper.GetFastaSequence(sequenceStream); sequences[i] = fastaSequence.ConvertToString(); sequencesNames[i] = fastaSequence.ID; } else { sequences[i] = customSequences[i]; sequencesNames[i] = $"Custom sequence {i + 1}. Length: {customSequences[i].Length}"; } } var sequencesCharacteristics = new SequenceCharacteristics[sequences.Length]; for (int j = 0; j < sequences.Length; j++) { var characteristics = new double[characteristicLinkIds.Length]; for (int k = 0; k < characteristicLinkIds.Length; k++) { var sequence = new Chain(sequences[j]); for (int l = 0; l < iterationsCount; l++) { for (int w = 0; w < transformationsSequence.Length; w++) { sequence = transformationsSequence[w] == OrderTransformation.Dissimilar ? DissimilarChainFactory.Create(sequence) : HighOrderFactory.Create(sequence, EnumExtensions.GetLink(transformationsSequence[w])); } } Link link = characteristicTypeLinkRepository.GetLinkForCharacteristic(characteristicLinkIds[k]); FullCharacteristic characteristic = characteristicTypeLinkRepository.GetCharacteristic(characteristicLinkIds[k]); IFullCalculator calculator = FullCalculatorsFactory.CreateCalculator(characteristic); characteristics[k] = calculator.Calculate(sequence, link); } sequencesCharacteristics[j] = new SequenceCharacteristics { MatterName = sequencesNames[j], Characteristics = characteristics }; } string[] characteristicNames = characteristicLinkIds.Select(c => characteristicTypeLinkRepository.GetCharacteristicName(c)).ToArray(); var characteristicsList = new SelectListItem[characteristicLinkIds.Length]; for (int i = 0; i < characteristicNames.Length; i++) { characteristicsList[i] = new SelectListItem { Value = i.ToString(), Text = characteristicNames[i], Selected = false }; } var transformations = new Dictionary <int, string>(); for (int i = 0; i < transformationsSequence.Length; i++) { transformations.Add(i, transformationsSequence[i].GetDisplayValue()); } var result = new Dictionary <string, object> { { "characteristics", sequencesCharacteristics }, { "characteristicNames", characteristicNames }, { "characteristicsList", characteristicsList }, { "transformationsList", transformations }, { "iterationsCount", iterationsCount } }; return new Dictionary <string, object> { { "data", JsonConvert.SerializeObject(result) } }; })); }
public ActionResult Index(bool reinmportSequences) { return(CreateTask(() => { if (reinmportSequences) { throw new NotImplementedException(); } Dictionary <string, AccessionUpdateSearchResult> sequencesData = new Dictionary <string, AccessionUpdateSearchResult>(); using (var db = new LibiadaWebEntities()) { var dnaSequenceRepository = new GeneticSequenceRepository(db); var sequencesWithAccessions = db.DnaSequence .Include(ds => ds.Matter) .Where(ds => ds.Notation == Notation.Nucleotides && !string.IsNullOrEmpty(ds.RemoteId)) .ToArray(); sequencesData = sequencesWithAccessions .ToDictionary(s => s.RemoteId.Split('.')[0], s => new AccessionUpdateSearchResult() { LocalAccession = s.RemoteId, LocalVersion = Convert.ToByte(s.RemoteId.Split('?')[0].Split('.')[1]), Name = s.Matter.Name.Split('|')[0].Trim(), LocalUpdateDate = s.Matter.Modified.ToString(OutputFormats.DateFormat), LocalUpdateDateTime = s.Matter.Modified }); } List <NuccoreObject> searchResults = new List <NuccoreObject>(); // slicing accessions into chunks to prevent "too long request" error string[] accessions = sequencesData.Keys.ToArray(); const int maxChunkSize = 10000; for (int i = 0; i < accessions.Length; i += maxChunkSize) { int actualChunkSize = Math.Min(maxChunkSize, accessions.Length - i); var accessionsChunk = new string[actualChunkSize]; Array.Copy(accessions, i, accessionsChunk, 0, actualChunkSize); (string ncbiWebEnvironment, string queryKey) = NcbiHelper.ExecuteEPostRequest(string.Join(",", accessionsChunk)); searchResults.AddRange(NcbiHelper.ExecuteESummaryRequest(ncbiWebEnvironment, queryKey, true)); } for (int i = 0; i < searchResults.Count; i++) { var searchResult = searchResults[i]; searchResult.Title = searchResult.Title.TrimEnd(".") .TrimEnd(", complete genome") .TrimEnd(", complete sequence") .TrimEnd(", complete CDS") .TrimEnd(", complete cds") .TrimEnd(", genome"); var newAccession = searchResult.AccessionVersion.Split('.'); var sequenceData = sequencesData[newAccession[0]]; sequenceData.RemoteVersion = Convert.ToByte(newAccession[1]); sequenceData.RemoteName = searchResult.Title; sequenceData.RemoteOrganism = searchResult.Organism; sequenceData.RemoteUpdateDate = searchResult.UpdateDate.ToString(OutputFormats.DateFormat); sequenceData.Updated = sequenceData.LocalUpdateDateTime <= searchResult.UpdateDate || sequenceData.RemoteVersion > sequenceData.LocalVersion; sequenceData.NameUpdated = !(sequenceData.Name.Contains(searchResult.Title) && sequenceData.Name.Contains(searchResult.Organism)); } var result = new Dictionary <string, object> { { "results", sequencesData.Values .OrderByDescending(r => r.RemoteVersion - r.LocalVersion) .ThenBy(r => r.Updated) .ThenBy(r => r.NameUpdated) } }; return new Dictionary <string, string> { { "data", JsonConvert.SerializeObject(result) } }; })); }
public ActionResult Create( [Bind(Include = "Id,Notation,RemoteDb,RemoteId,Description,Matter,MatterId")] CommonSequence commonSequence, bool localFile, Language?language, bool?original, Translator?translator, bool?partial, int?precision) { return(CreateTask(() => { var db = new LibiadaWebEntities(); try { if (!ModelState.IsValid) { throw new Exception("Model state is invalid"); } Stream sequenceStream; Nature nature = commonSequence.Notation.GetNature(); if (nature == Nature.Genetic && !localFile) { sequenceStream = NcbiHelper.GetFastaFileStream(commonSequence.RemoteId); } else { sequenceStream = FileHelper.GetFileStream(Request.Files[0]); } switch (nature) { case Nature.Genetic: ISequence bioSequence = NcbiHelper.GetFastaSequence(sequenceStream); var dnaSequenceRepository = new GeneticSequenceRepository(db); dnaSequenceRepository.Create(commonSequence, bioSequence, partial ?? false); break; case Nature.Music: var musicSequenceRepository = new MusicSequenceRepository(db); musicSequenceRepository.Create(commonSequence, sequenceStream); break; case Nature.Literature: var literatureSequenceRepository = new LiteratureSequenceRepository(db); literatureSequenceRepository.Create(commonSequence, sequenceStream, language ?? Language.Russian, original ?? true, translator ?? Translator.NoneOrManual); break; case Nature.MeasurementData: var dataSequenceRepository = new DataSequenceRepository(db); dataSequenceRepository.Create(commonSequence, sequenceStream, precision ?? 0); break; case Nature.Image: var matterRepository = new MatterRepository(db); int fileSize = Request.Files[0].ContentLength; var file = new byte[fileSize]; Request.Files[0].InputStream.Read(file, 0, fileSize); var matter = new Matter { Nature = Nature.Image, SequenceType = commonSequence.Matter.SequenceType, Name = commonSequence.Matter.Name, Source = file, Group = commonSequence.Matter.Group }; matterRepository.SaveToDatabase(matter); break; default: throw new InvalidEnumArgumentException(nameof(nature), (int)nature, typeof(Nature)); } string multisequenceName = db.Multisequence.SingleOrDefault(ms => ms.Id == commonSequence.Matter.MultisequenceId).Name; var result = new ImportResult(commonSequence, language, original, translator, partial, precision, multisequenceName); return new Dictionary <string, string> { { "data", JsonConvert.SerializeObject(result) } }; } catch (Exception) { long matterId = commonSequence.MatterId; if (matterId != 0) { List <Matter> orphanMatter = db.Matter .Include(m => m.Sequence) .Where(m => m.Id == matterId && m.Sequence.Count == 0) .ToList(); if (orphanMatter.Count > 0) { db.Matter.Remove(orphanMatter[0]); db.SaveChanges(); } } throw; } finally { Dispose(true); } })); }
public ActionResult Index( string searchQuery, bool importPartial, bool filterMinLength, int minLength, bool filterMaxLength, int maxLength) { return(CreateTask(() => { if (filterMinLength) { searchQuery = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery, minLength); } else { searchQuery = filterMaxLength ? NcbiHelper.FormatNcbiSearchTerm(searchQuery, maxLength: maxLength) : NcbiHelper.FormatNcbiSearchTerm(searchQuery); } List <NuccoreObject> searchResults = NcbiHelper.ExecuteESummaryRequest(searchQuery, importPartial); List <NuccoreObject> unfilteredSearchResults; List <NuccoreObject> filteresOutSearchResults = searchResults; string[] accessions; if (!importPartial) { unfilteredSearchResults = NcbiHelper.ExecuteESummaryRequest(searchQuery, true); filteresOutSearchResults = unfilteredSearchResults.Except(searchResults).ToList(); accessions = unfilteredSearchResults.Select(no => no.AccessionVersion.Split('.')[0]).Distinct().ToArray(); } else { accessions = searchResults.Select(no => no.AccessionVersion.Split('.')[0]).Distinct().ToArray(); } var results = new List <MatterImportResult>(accessions.Length); string[] existingAccessions; using (var db = new LibiadaWebEntities()) { var dnaSequenceRepository = new GeneticSequenceRepository(db); (existingAccessions, _) = dnaSequenceRepository.SplitAccessionsIntoExistingAndNotImported(accessions); } searchResults = searchResults .Where(sr => !existingAccessions.Contains(sr.AccessionVersion.Split('.')[0])) .ToList(); foreach (var searchResult in searchResults) { results.Add(new MatterImportResult() { MatterName = $"{searchResult.Title} | {searchResult.AccessionVersion}", Result = "Found new sequence", Status = "Success" }); } results.AddRange(existingAccessions.ConvertAll(existingAccession => new MatterImportResult { MatterName = existingAccession, Result = "Sequence already exists", Status = "Exists" })); if (!importPartial) { filteresOutSearchResults = filteresOutSearchResults .Where(sr => !existingAccessions.Contains(sr.AccessionVersion.Split('.')[0])) .ToList(); foreach (var filteresOutSearchResult in filteresOutSearchResults) { results.Add(new MatterImportResult() { MatterName = $"{filteresOutSearchResult.Title} | {filteresOutSearchResult.AccessionVersion}", Result = "Filtered out", Status = "Error" }); } } accessions = searchResults.Select(sr => sr.AccessionVersion).ToArray(); var result = new Dictionary <string, object> { { "result", results }, { "accessions", accessions } }; return new Dictionary <string, string> { { "data", JsonConvert.SerializeObject(result) } }; })); }