private async Task <List <DeserializedInfo> > GetToolsAsync() { Logger.LogDebug("Getting tools list from ToolShed."); using var client = new HttpClient(); HttpResponseMessage response = await client.GetAsync(new Uri(Repo.URI + _repositoriesEndpoint)).ConfigureAwait(false); string content; if (response.IsSuccessStatusCode) { content = await response.Content.ReadAsStringAsync().ConfigureAwait(false); } else { /// TODO: replace with an exception. return(null); } Logger.LogDebug("Received tools from ToolShed, deserializing them."); DeserializedInfo.TryDeserialize( content, ToolJsonSerializerSettings, ToolRepoAssoJsonSerializerSettings, out List <DeserializedInfo> deserializedInfos); foreach (var info in deserializedInfos) { info.SetStagingArea(SessionTempPath); } return(deserializedInfos); }
protected bool TryAddEntities(DeserializedInfo info) { // Checks if the association and the tool // contains the required information. if (info == null || info.ToolRepoAssociation.Tool == null) { return(false); } if (info.ToolRepoAssociation.Tool.Name == null) { Logger.LogDebug("Skipping tool because missing name."); return(false); } if (info.ToolRepoAssociation.DateAddedToRepository == null) { Logger.LogDebug($"Skipping tool {info.ToolRepoAssociation.Tool.Name} because the data it was added to repository is not set."); return(false); } var toolName = info.ToolRepoAssociation.Tool.Name = info.ToolRepoAssociation.Tool.Name.Trim(); if (!Tools.TryAdd(toolName, info.ToolRepoAssociation.Tool)) { info.ToolRepoAssociation.Tool = Tools[toolName]; } // TODO: there could be a better way of associating categories with // repository if the tool association was successful than this method. var categoryRepoAssoToRegister = new List <CategoryRepoAssociation>(); foreach (var association in info.CategoryRepoAssociations) { var asso = EnsureEntity(association); info.ToolRepoAssociation.Tool.CategoryAssociations .Add(new ToolCategoryAssociation() { Category = asso.Category, Tool = info.ToolRepoAssociation.Tool }); categoryRepoAssoToRegister.Add(asso); } AddToolPubAssociations(info.ToolRepoAssociation.Tool, info.ToolPubAssociations); if (TryAddToolRepoAssociations(info)) { foreach (var association in categoryRepoAssoToRegister) { Repo.CategoryAssociations.Add(association); } return(true); } else { return(false); } }
private void TraverseArchive(string archiveFileName) { try { using ZipArchive archive = ZipFile.OpenRead(archiveFileName); foreach (ZipArchiveEntry entry in archive.Entries) { if (entry.FullName.EndsWith("meta.yaml", StringComparison.OrdinalIgnoreCase)) { string extractedFileName = SessionTempPath + Utilities.GetRandomString() + ".yaml"; try { entry.ExtractToFile(extractedFileName); using var reader = new StreamReader(extractedFileName); var yaml = new YamlStream(); yaml.Load(reader); if (!DeserializedInfo.TryDeserialize(yaml, out DeserializedInfo deserializedInfo)) { Logger.LogInformation($"Cannot deserialize tool info from {entry.FullName}."); continue; } if (deserializedInfo.ToolRepoAssociation != null && deserializedInfo.ToolRepoAssociation.Tool != null && deserializedInfo.ToolRepoAssociation.Tool.Name != null && _addedDates.ContainsKey(deserializedInfo.ToolRepoAssociation.Tool.Name)) { deserializedInfo.ToolRepoAssociation.DateAddedToRepository = _addedDates[deserializedInfo.ToolRepoAssociation.Tool.Name]; } if (!TryAddEntities(deserializedInfo)) { // TODO: log why this tool will not be added to db. } } catch (Exception e) when(e is YamlDotNet.Core.YamlException || e is YamlDotNet.Core.SyntaxErrorException || e is YamlDotNet.Core.SemanticErrorException) { Logger.LogDebug($"Cannot parse the YAML file {entry.FullName}: {e.Message}"); } finally { File.Delete(extractedFileName); } } } } catch (Exception e) { Logger.LogError($"Error occurred traversing Bioconda repository: {e.Message}"); } finally { File.Delete(archiveFileName); } }
private void Cleanup(DeserializedInfo info) { if (info == null) { return; } Logger.LogDebug($"Deleting temporary files of tool {info.ToolRepoAssociation.Tool.Name}."); Directory.Delete(info.StagingArea, true); Logger.LogDebug($"Deleted temporary files of tool {info.ToolRepoAssociation.Tool.Name}."); }
private void TraverseArchive(string archiveFileName) { try { using ZipArchive archive = ZipFile.OpenRead(archiveFileName); foreach (ZipArchiveEntry entry in archive.Entries) { if (entry.FullName.EndsWith(".json", StringComparison.OrdinalIgnoreCase) && !entry.FullName.EndsWith("oeb.json", StringComparison.OrdinalIgnoreCase)) { string extractedFileName = SessionTempPath + Utilities.GetRandomString() + ".json"; try { entry.ExtractToFile(extractedFileName); using var reader = new StreamReader(extractedFileName); if (!DeserializedInfo.TryDeserialize( reader.ReadToEnd(), ToolJsonSerializerSettings, ToolRepoAssoJsonSerializerSettings, PublicationSerializerSettings, CategorySerializerSettings, out DeserializedInfo deserializedInfo)) { // TODO: log this. continue; } if (!TryAddEntities(deserializedInfo)) { // TODO: log why this tool will not be added to db. } } catch (IOException e) { // TODO: log this. } finally { File.Delete(extractedFileName); } } } } catch (Exception e) { // TODO: log the exception. // TODO: if this exception has occurred, the caller job's status should be set to failed. } finally { File.Delete(archiveFileName); } }
private DeserializedInfo WrapperExtractor(DeserializedInfo info) { if (info == null) { return(null); } try { Logger.LogDebug($"Extracting XML files from tool {info.ToolRepoAssociation.Tool.Name} archive."); using ZipArchive archive = ZipFile.Open(info.ArchiveFilename, ZipArchiveMode.Read); foreach (ZipArchiveEntry entry in archive.Entries) { if (entry.FullName.EndsWith(".xml", StringComparison.OrdinalIgnoreCase)) { /// A random string is appended to the filename to avoid filename /// collision when extracting and storing files with common names /// in a common folder, which organized under different folders in /// an archive. var extractedFileName = info.ArchiveExtractionPath + Path.GetFileNameWithoutExtension(entry.FullName) + Utilities.GetRandomString(8); /// Surrounding the file extraction from archive in a /// try-catch block enables extracting XML files /// independently; hence, if one file is broken/invalid /// the process can continue with other files that /// may be valid. try { Logger.LogInformation($"Extracting XML file {entry.FullName} of tool {info.ToolRepoAssociation.Tool.Name}."); entry.ExtractToFile(extractedFileName); Logger.LogInformation($"Successfully extracted XML file {entry.FullName} of tool {info.ToolRepoAssociation.Tool.Name}."); info.XMLFiles.Add(extractedFileName); } catch (InvalidDataException e) { // This exception is thrown when the Zip archive cannot be read. Logger.LogDebug($"Failed extracting XML file {entry.FullName} of tool {info.ToolRepoAssociation.Tool.Name}: {e.Message}"); } } } Logger.LogDebug($"Extracted {info.XMLFiles.Count} XML file(s) for tool {info.ToolRepoAssociation.Tool.Name}."); return(info); } catch (InvalidDataException e) { // This exception is thrown when the Zip archive cannot be read. Logger.LogDebug($"Failed extracting XML files from tool {info.ToolRepoAssociation.Tool.Name} archive: {e.Message}"); return(null); } }
private bool TryAddToolRepoAssociations(DeserializedInfo info) { if (ToolRepoAssociationsDict.TryAdd(FormatToolRepoAssociationName(info.ToolRepoAssociation.Tool), info.ToolRepoAssociation)) { Repo.ToolAssociations.Add(info.ToolRepoAssociation); return(true); } else { Logger.LogDebug($"Association between Tool {info.ToolRepoAssociation.Tool.Name} and Repository {Repo.Name} already exists."); return(false); } }
private DeserializedInfo Downloader(DeserializedInfo info) { try { Logger.LogDebug($"Downloading archive of {info.ToolRepoAssociation.Tool.Name}."); /// Note: do not use base WebClient, because it cannot /// download multiple files concurrently. using var client = new WebClient(); client.DownloadFile( address: new Uri( $"https://toolshed.g2.bx.psu.edu/repos/" + $"{info.ToolRepoAssociation.Owner}/{info.ToolRepoAssociation.Tool.Name}/" + $"archive/tip.zip"), fileName: info.ArchiveFilename); Logger.LogDebug($"Successfully downloaded archive of {info.ToolRepoAssociation.Tool.Name}."); return(info); } catch (WebException e) { Logger.LogDebug($"Failed downloading archive of {info.ToolRepoAssociation.Tool.Name}: {e.Message}"); return(null); } }
private DeserializedInfo ExtractPublications(DeserializedInfo info) { if (info == null) { return(null); } foreach (var filename in info.XMLFiles) { Logger.LogDebug( $"Extracting publication info from XML file " + $"{Path.GetFileNameWithoutExtension(filename)} " + $"of tool {info.ToolRepoAssociation.Tool.Name}."); try { XElement toolDoc = XElement.Load(filename); var pubAssociations = new List <ToolPublicationAssociation>(); foreach (var item in toolDoc.Elements("citations").Descendants()) { if (item.Attribute("type") != null) { switch (item.Attribute("type").Value.Trim().ToUpperInvariant()) { case "DOI": pubAssociations.Add( new ToolPublicationAssociation() { Publication = new Publication() { DOI = item.Value } }); /// Some tools have one BibItem that contains only DOI, and /// another BibItem that contains publication info. There should /// be only one BibItem per publication contains both DOI and /// publication info. Therefore, for tools with two bibitems, /// we consider only the one containing DOI. continue; case "BIBTEX": try { if (TryParseBibitem(item.Value, out Publication pub)) { pubAssociations.Add( new ToolPublicationAssociation() { Publication = pub }); } } catch (ArgumentException e) { Logger.LogDebug( $"Error extracting publication from XML file of tool " + $"{info.ToolRepoAssociation.Tool.Name}:{e.Message}"); } break; } } } Logger.LogDebug( $"Successfully extract publication info from XML file " + $"{Path.GetFileNameWithoutExtension(filename)} " + $"of tool {info.ToolRepoAssociation.Tool.Name}."); info.ToolPubAssociations = pubAssociations; TryAddEntities(info); } catch (System.Xml.XmlException e) { /// This exception may happen if the XML /// file has multiple roots. Logger.LogDebug( $"Failed extracting publication info from XML file " + $"{Path.GetFileNameWithoutExtension(filename)}" + $" of tool {info.ToolRepoAssociation.Tool.Name}: {e.Message}"); return(null); } } return(info); }