Exemple #1
0
        private async Task <List <DeserializedInfo> > GetToolsAsync()
        {
            Logger.LogDebug("Getting tools list from ToolShed.");
            using var client = new HttpClient();
            HttpResponseMessage response = await client.GetAsync(new Uri(Repo.URI + _repositoriesEndpoint)).ConfigureAwait(false);

            string content;

            if (response.IsSuccessStatusCode)
            {
                content = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
            }
            else
            {
                /// TODO: replace with an exception.
                return(null);
            }

            Logger.LogDebug("Received tools from ToolShed, deserializing them.");
            DeserializedInfo.TryDeserialize(
                content,
                ToolJsonSerializerSettings,
                ToolRepoAssoJsonSerializerSettings,
                out List <DeserializedInfo> deserializedInfos);
            foreach (var info in deserializedInfos)
            {
                info.SetStagingArea(SessionTempPath);
            }
            return(deserializedInfos);
        }
Exemple #2
0
        protected bool TryAddEntities(DeserializedInfo info)
        {
            // Checks if the association and the tool
            // contains the required information.
            if (info == null ||
                info.ToolRepoAssociation.Tool == null)
            {
                return(false);
            }

            if (info.ToolRepoAssociation.Tool.Name == null)
            {
                Logger.LogDebug("Skipping tool because missing name.");
                return(false);
            }

            if (info.ToolRepoAssociation.DateAddedToRepository == null)
            {
                Logger.LogDebug($"Skipping tool {info.ToolRepoAssociation.Tool.Name} because the data it was added to repository is not set.");
                return(false);
            }

            var toolName = info.ToolRepoAssociation.Tool.Name = info.ToolRepoAssociation.Tool.Name.Trim();

            if (!Tools.TryAdd(toolName, info.ToolRepoAssociation.Tool))
            {
                info.ToolRepoAssociation.Tool = Tools[toolName];
            }

            // TODO: there could be a better way of associating categories with
            // repository if the tool association was successful than this method.
            var categoryRepoAssoToRegister = new List <CategoryRepoAssociation>();

            foreach (var association in info.CategoryRepoAssociations)
            {
                var asso = EnsureEntity(association);
                info.ToolRepoAssociation.Tool.CategoryAssociations
                .Add(new ToolCategoryAssociation()
                {
                    Category = asso.Category,
                    Tool     = info.ToolRepoAssociation.Tool
                });

                categoryRepoAssoToRegister.Add(asso);
            }

            AddToolPubAssociations(info.ToolRepoAssociation.Tool, info.ToolPubAssociations);
            if (TryAddToolRepoAssociations(info))
            {
                foreach (var association in categoryRepoAssoToRegister)
                {
                    Repo.CategoryAssociations.Add(association);
                }
                return(true);
            }
            else
            {
                return(false);
            }
        }
Exemple #3
0
        private void TraverseArchive(string archiveFileName)
        {
            try
            {
                using ZipArchive archive = ZipFile.OpenRead(archiveFileName);
                foreach (ZipArchiveEntry entry in archive.Entries)
                {
                    if (entry.FullName.EndsWith("meta.yaml", StringComparison.OrdinalIgnoreCase))
                    {
                        string extractedFileName = SessionTempPath + Utilities.GetRandomString() + ".yaml";
                        try
                        {
                            entry.ExtractToFile(extractedFileName);
                            using var reader = new StreamReader(extractedFileName);
                            var yaml = new YamlStream();
                            yaml.Load(reader);
                            if (!DeserializedInfo.TryDeserialize(yaml, out DeserializedInfo deserializedInfo))
                            {
                                Logger.LogInformation($"Cannot deserialize tool info from {entry.FullName}.");
                                continue;
                            }

                            if (deserializedInfo.ToolRepoAssociation != null &&
                                deserializedInfo.ToolRepoAssociation.Tool != null &&
                                deserializedInfo.ToolRepoAssociation.Tool.Name != null &&
                                _addedDates.ContainsKey(deserializedInfo.ToolRepoAssociation.Tool.Name))
                            {
                                deserializedInfo.ToolRepoAssociation.DateAddedToRepository =
                                    _addedDates[deserializedInfo.ToolRepoAssociation.Tool.Name];
                            }

                            if (!TryAddEntities(deserializedInfo))
                            {
                                // TODO: log why this tool will not be added to db.
                            }
                        }
                        catch (Exception e) when(e is YamlDotNet.Core.YamlException ||
                                                 e is YamlDotNet.Core.SyntaxErrorException ||
                                                 e is YamlDotNet.Core.SemanticErrorException)
                        {
                            Logger.LogDebug($"Cannot parse the YAML file {entry.FullName}: {e.Message}");
                        }
                        finally
                        {
                            File.Delete(extractedFileName);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Logger.LogError($"Error occurred traversing Bioconda repository: {e.Message}");
            }
            finally
            {
                File.Delete(archiveFileName);
            }
        }
Exemple #4
0
 private void Cleanup(DeserializedInfo info)
 {
     if (info == null)
     {
         return;
     }
     Logger.LogDebug($"Deleting temporary files of tool {info.ToolRepoAssociation.Tool.Name}.");
     Directory.Delete(info.StagingArea, true);
     Logger.LogDebug($"Deleted temporary files of tool {info.ToolRepoAssociation.Tool.Name}.");
 }
Exemple #5
0
        private void TraverseArchive(string archiveFileName)
        {
            try
            {
                using ZipArchive archive = ZipFile.OpenRead(archiveFileName);
                foreach (ZipArchiveEntry entry in archive.Entries)
                {
                    if (entry.FullName.EndsWith(".json", StringComparison.OrdinalIgnoreCase) &&
                        !entry.FullName.EndsWith("oeb.json", StringComparison.OrdinalIgnoreCase))
                    {
                        string extractedFileName = SessionTempPath + Utilities.GetRandomString() + ".json";
                        try
                        {
                            entry.ExtractToFile(extractedFileName);
                            using var reader = new StreamReader(extractedFileName);
                            if (!DeserializedInfo.TryDeserialize(
                                    reader.ReadToEnd(),
                                    ToolJsonSerializerSettings,
                                    ToolRepoAssoJsonSerializerSettings,
                                    PublicationSerializerSettings,
                                    CategorySerializerSettings,
                                    out DeserializedInfo deserializedInfo))
                            {
                                // TODO: log this.
                                continue;
                            }

                            if (!TryAddEntities(deserializedInfo))
                            {
                                // TODO: log why this tool will not be added to db.
                            }
                        }
                        catch (IOException e)
                        {
                            // TODO: log this.
                        }
                        finally
                        {
                            File.Delete(extractedFileName);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                // TODO: log the exception.
                // TODO: if this exception has occurred, the caller job's status should be set to failed.
            }
            finally
            {
                File.Delete(archiveFileName);
            }
        }
Exemple #6
0
        private DeserializedInfo WrapperExtractor(DeserializedInfo info)
        {
            if (info == null)
            {
                return(null);
            }

            try
            {
                Logger.LogDebug($"Extracting XML files from tool {info.ToolRepoAssociation.Tool.Name} archive.");
                using ZipArchive archive = ZipFile.Open(info.ArchiveFilename, ZipArchiveMode.Read);
                foreach (ZipArchiveEntry entry in archive.Entries)
                {
                    if (entry.FullName.EndsWith(".xml", StringComparison.OrdinalIgnoreCase))
                    {
                        /// A random string is appended to the filename to avoid filename
                        /// collision when extracting and storing files with common names
                        /// in a common folder, which organized under different folders in
                        /// an archive.
                        var extractedFileName = info.ArchiveExtractionPath +
                                                Path.GetFileNameWithoutExtension(entry.FullName) + Utilities.GetRandomString(8);

                        /// Surrounding the file extraction from archive in a
                        /// try-catch block enables extracting XML files
                        /// independently; hence, if one file is broken/invalid
                        /// the process can continue with other files that
                        /// may be valid.
                        try
                        {
                            Logger.LogInformation($"Extracting XML file {entry.FullName} of tool {info.ToolRepoAssociation.Tool.Name}.");
                            entry.ExtractToFile(extractedFileName);
                            Logger.LogInformation($"Successfully extracted XML file {entry.FullName} of tool {info.ToolRepoAssociation.Tool.Name}.");
                            info.XMLFiles.Add(extractedFileName);
                        }
                        catch (InvalidDataException e)
                        {
                            // This exception is thrown when the Zip archive cannot be read.
                            Logger.LogDebug($"Failed extracting XML file {entry.FullName} of tool {info.ToolRepoAssociation.Tool.Name}: {e.Message}");
                        }
                    }
                }

                Logger.LogDebug($"Extracted {info.XMLFiles.Count} XML file(s) for tool {info.ToolRepoAssociation.Tool.Name}.");
                return(info);
            }
            catch (InvalidDataException e)
            {
                // This exception is thrown when the Zip archive cannot be read.
                Logger.LogDebug($"Failed extracting XML files from tool {info.ToolRepoAssociation.Tool.Name} archive: {e.Message}");
                return(null);
            }
        }
Exemple #7
0
 private bool TryAddToolRepoAssociations(DeserializedInfo info)
 {
     if (ToolRepoAssociationsDict.TryAdd(FormatToolRepoAssociationName(info.ToolRepoAssociation.Tool), info.ToolRepoAssociation))
     {
         Repo.ToolAssociations.Add(info.ToolRepoAssociation);
         return(true);
     }
     else
     {
         Logger.LogDebug($"Association between Tool {info.ToolRepoAssociation.Tool.Name} and Repository {Repo.Name} already exists.");
         return(false);
     }
 }
Exemple #8
0
 private DeserializedInfo Downloader(DeserializedInfo info)
 {
     try
     {
         Logger.LogDebug($"Downloading archive of {info.ToolRepoAssociation.Tool.Name}.");
         /// Note: do not use base WebClient, because it cannot
         /// download multiple files concurrently.
         using var client = new WebClient();
         client.DownloadFile(
             address: new Uri(
                 $"https://toolshed.g2.bx.psu.edu/repos/" +
                 $"{info.ToolRepoAssociation.Owner}/{info.ToolRepoAssociation.Tool.Name}/" +
                 $"archive/tip.zip"),
             fileName: info.ArchiveFilename);
         Logger.LogDebug($"Successfully downloaded archive of {info.ToolRepoAssociation.Tool.Name}.");
         return(info);
     }
     catch (WebException e)
     {
         Logger.LogDebug($"Failed downloading archive of {info.ToolRepoAssociation.Tool.Name}: {e.Message}");
         return(null);
     }
 }
Exemple #9
0
        private DeserializedInfo ExtractPublications(DeserializedInfo info)
        {
            if (info == null)
            {
                return(null);
            }

            foreach (var filename in info.XMLFiles)
            {
                Logger.LogDebug(
                    $"Extracting publication info from XML file " +
                    $"{Path.GetFileNameWithoutExtension(filename)} " +
                    $"of tool {info.ToolRepoAssociation.Tool.Name}.");

                try
                {
                    XElement toolDoc         = XElement.Load(filename);
                    var      pubAssociations = new List <ToolPublicationAssociation>();
                    foreach (var item in toolDoc.Elements("citations").Descendants())
                    {
                        if (item.Attribute("type") != null)
                        {
                            switch (item.Attribute("type").Value.Trim().ToUpperInvariant())
                            {
                            case "DOI":
                                pubAssociations.Add(
                                    new ToolPublicationAssociation()
                                {
                                    Publication = new Publication()
                                    {
                                        DOI = item.Value
                                    }
                                });
                                /// Some tools have one BibItem that contains only DOI, and
                                /// another BibItem that contains publication info. There should
                                /// be only one BibItem per publication contains both DOI and
                                /// publication info. Therefore, for tools with two bibitems,
                                /// we consider only the one containing DOI.
                                continue;

                            case "BIBTEX":
                                try
                                {
                                    if (TryParseBibitem(item.Value, out Publication pub))
                                    {
                                        pubAssociations.Add(
                                            new ToolPublicationAssociation()
                                        {
                                            Publication = pub
                                        });
                                    }
                                }
                                catch (ArgumentException e)
                                {
                                    Logger.LogDebug(
                                        $"Error extracting publication from XML file of tool " +
                                        $"{info.ToolRepoAssociation.Tool.Name}:{e.Message}");
                                }
                                break;
                            }
                        }
                    }

                    Logger.LogDebug(
                        $"Successfully extract publication info from XML file " +
                        $"{Path.GetFileNameWithoutExtension(filename)} " +
                        $"of tool {info.ToolRepoAssociation.Tool.Name}.");

                    info.ToolPubAssociations = pubAssociations;
                    TryAddEntities(info);
                }
                catch (System.Xml.XmlException e)
                {
                    /// This exception may happen if the XML
                    /// file has multiple roots.
                    Logger.LogDebug(
                        $"Failed extracting publication info from XML file " +
                        $"{Path.GetFileNameWithoutExtension(filename)}" +
                        $" of tool {info.ToolRepoAssociation.Tool.Name}: {e.Message}");

                    return(null);
                }
            }

            return(info);
        }