Beispiel #1
0
        public async void Start()
        {
            var applicationConfiguration  = configurationSection.GetSection(ApplicationConfigurationSectionName);
            var targetUrlsEntriesAsString = applicationConfiguration.GetValue <string>(TargetUrlsConfigurationSection);
            var targetUrlsEntries         = targetUrlsEntriesAsString.Split(TargetUrlsSplitChar);

            var bindingConfiguration = configurationSection.GetSection(BindingConfigurationSectionName);

            var senderExchange            = bindingConfiguration.GetValue <string>(SenderExchangeConfigurationName);
            var senderRoutingKeysAsString = bindingConfiguration.GetValue <string>(SenderRoutingKeysConfigurationName);

            var senderRoutingKeysAsArray = senderRoutingKeysAsString.Split(routingKeySplitChar);

            string pageDataAsString;

            foreach (var link in targetUrlsEntries)
            {
                pageDataAsString = await dataRetriever.RetrieveStringAsync(link);

                rmqPublisher.Publish(
                    Encoding.ASCII.GetBytes(pageDataAsString),
                    senderExchange,
                    senderRoutingKeysAsArray);
            }
        }
        public async Task <List <string> > Extract(string urlToParse)
        {
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(await dataRetriever.RetrieveStringAsync(urlToParse));

            List <string> imageLinks = new List <string>();

            var allImageNodes = doc.DocumentNode.SelectNodes("//img/@src");

            foreach (HtmlNode node in allImageNodes)
            {
                var rawImageLink = node.Attributes["src"];

                var imageLink = new Uri(rawImageLink.Value, UriKind.RelativeOrAbsolute);
                if (!imageLink.IsAbsoluteUri)
                {
                    imageLink = new Uri(new Uri(urlToParse), imageLink);
                }

                imageLinks.Add(imageLink.ToString());
            }

            return(imageLinks);
        }