public void SetWebsitePatternConfiguration(WebScrapeUserInput data, ref WebScrapeInput input) { try { var defaultConfig = data.DefaultWebsiteConfiguration; var patternConfig = defaultConfig.WebsitePatternConfig; if (patternConfig == null) { return; } var urls = new List <UrlPair>(); // loop through all settings foreach (var setting in patternConfig.PatternSettings) { var urlPattern = setting.UrlPattern; int totalItems = setting.TotalItems, startIndex = patternConfig.StartIndex, currentPage = patternConfig.CurrentPage, itemsPerPage = patternConfig.ItemsPerPage; // get total loop count var totalPage = totalItems / patternConfig.ItemsPerPage; // limit paging if applicable if (input.ShouldLimitPaging) { totalPage = totalPage > input.PagingLimit ? input.PagingLimit : totalPage; } for (int ind = 0; ind < totalPage; ind++) { var url = urlPattern.Clone().ToString(); var body = string.Empty; if (patternConfig.HttpVerb == HttpVerbType.GET) { // replace key words in url url = url.Replace("{StartIndex}", startIndex.ToString()); url = url.Replace("{ItemsPerPage}", itemsPerPage.ToString()); url = url.Replace("{CurrentPage}", currentPage.ToString()); // add custom page number to url url += $"&utm_source_page={ind + 1}"; } else // if(patternConfig.HttpVerb == HttpVerbType.POST) { // form body body = "?"; if (setting.RequestBodyParams != null) { foreach (var param in setting.RequestBodyParams) { body += body == "?" ? string.Empty : "&"; body += $"{param.ParamName}='{param.Value}'"; } } // replace key words in url body = body.Replace("{StartIndex}", startIndex.ToString()); body = body.Replace("{ItemsPerPage}", itemsPerPage.ToString()); body = body.Replace("{CurrentPage}", currentPage.ToString()); } // add the url to list urls.Add(new UrlPair { Url = url, RequestBody = body }); currentPage++; startIndex = ((ind + 1) * itemsPerPage) + patternConfig.StartIndex; } } var siteURLs = urls.ToArray(); if (siteURLs != null && siteURLs.Length > 0) { int splitSize = GetSplitSize(siteURLs); var arrays = siteURLs.Split(splitSize); foreach (var arr in arrays) { var URLs = new List <KeyValuePair <Identifier, UrlPair> >(); // add guid to url foreach (var url in arr.ToArray()) { URLs.Add(new KeyValuePair <Identifier, UrlPair>( new Identifier { UniqueID = string.Empty, MappingID = string.Empty }, new UrlPair { Url = url.Url, RequestBody = url.RequestBody } )); } WebsiteInformation website = new WebsiteInformation() { Name = defaultConfig.WebsiteNamePrefix, URL = defaultConfig.WebsiteURL, webScrapeType = defaultConfig.webScrapeType, HttpVerb = patternConfig.HttpVerb, URLs = URLs }; if (!string.IsNullOrEmpty(website.Name)) { input.Websites.Add(website); } } } } catch (Exception ex) { Logger.Write("Exception in SetWebsitePatternConfiguration -- ScrapeProductsListInput -> DataGrabber. Message: " + ex.Message); } finally { } }
public void SetDefaultConfiguration(WebScrapeUserInput data, ref WebScrapeInput input) { // Set Website Pattern Configuration SetWebsitePatternConfiguration(data, ref input); try { var defaultConfig = data.DefaultWebsiteConfiguration; if (defaultConfig == null) { return; } switch (defaultConfig.WebScrapeType) { case ScrapeType.SingleURL: { WebsiteInformation website = new WebsiteInformation() { Name = defaultConfig.WebsiteNamePrefix, URL = defaultConfig.WebsiteURL, webScrapeType = defaultConfig.webScrapeType, HttpVerb = HttpVerbType.GET }; if (!string.IsNullOrEmpty(website.Name)) { input.Websites.Add(website); } // TODO: move to multiple URLs if (defaultConfig.WebsiteURLsWithBody != null) { foreach (var urlsWithBody in defaultConfig.WebsiteURLsWithBody) { website = new WebsiteInformation() { Name = defaultConfig.WebsiteNamePrefix, URL = urlsWithBody.Url, RequestBody = urlsWithBody.Body, webScrapeType = defaultConfig.webScrapeType, HttpVerb = HttpVerbType.POST }; if (!string.IsNullOrEmpty(website.Name)) { input.Websites.Add(website); } } } break; } case ScrapeType.MultipleURLs: { var siteURLs = defaultConfig.WebsiteURLs; if (siteURLs != null && siteURLs.Length > 0) { int splitSize = GetSplitSize(siteURLs); var arrays = siteURLs.Split(splitSize); foreach (var arr in arrays) { var URLs = new List <KeyValuePair <Identifier, UrlPair> >(); // add guid to url foreach (var url in arr.ToArray()) { URLs.Add(new KeyValuePair <Identifier, UrlPair>( new Identifier { UniqueID = string.Empty, MappingID = string.Empty }, new UrlPair { Url = url } )); } WebsiteInformation website = new WebsiteInformation() { Name = defaultConfig.WebsiteNamePrefix, URL = defaultConfig.WebsiteURL, webScrapeType = defaultConfig.webScrapeType, HttpVerb = HttpVerbType.GET, URLs = URLs }; if (!string.IsNullOrEmpty(website.Name)) { input.Websites.Add(website); } } } break; } default: break; } if (input.Websites.Count == 0) { if (defaultConfig.WebScrapeType == ScrapeType.MultipleURLs) { // set default data if there are no websites WebsiteInformation website = new WebsiteInformation() { Name = defaultConfig.WebsiteNamePrefix, URL = defaultConfig.WebsiteURL, webScrapeType = defaultConfig.webScrapeType }; if (!string.IsNullOrEmpty(website.Name)) { input.Websites.Add(website); } } } } catch (Exception ex) { Logger.Write("Exception in SetDefaultConfiguration -- ScrapeProductsListInput -> DataGrabber. Message: " + ex.Message); } finally { } }