コード例 #1
0
        public void SetWebsitePatternConfiguration(WebScrapeUserInput data, ref WebScrapeInput input)
        {
            try
            {
                var defaultConfig = data.DefaultWebsiteConfiguration;
                var patternConfig = defaultConfig.WebsitePatternConfig;

                if (patternConfig == null)
                {
                    return;
                }


                var urls = new List <UrlPair>();


                // loop through all settings
                foreach (var setting in patternConfig.PatternSettings)
                {
                    var urlPattern = setting.UrlPattern;
                    int
                        totalItems   = setting.TotalItems,
                        startIndex   = patternConfig.StartIndex,
                        currentPage  = patternConfig.CurrentPage,
                        itemsPerPage = patternConfig.ItemsPerPage;

                    // get total loop count
                    var totalPage = totalItems / patternConfig.ItemsPerPage;

                    // limit paging if applicable
                    if (input.ShouldLimitPaging)
                    {
                        totalPage = totalPage > input.PagingLimit ? input.PagingLimit : totalPage;
                    }

                    for (int ind = 0; ind < totalPage; ind++)
                    {
                        var url  = urlPattern.Clone().ToString();
                        var body = string.Empty;

                        if (patternConfig.HttpVerb == HttpVerbType.GET)
                        {
                            // replace key words in url
                            url = url.Replace("{StartIndex}", startIndex.ToString());
                            url = url.Replace("{ItemsPerPage}", itemsPerPage.ToString());
                            url = url.Replace("{CurrentPage}", currentPage.ToString());

                            // add custom page number to url
                            url += $"&utm_source_page={ind + 1}";
                        }
                        else // if(patternConfig.HttpVerb == HttpVerbType.POST)
                        {
                            // form body
                            body = "?";

                            if (setting.RequestBodyParams != null)
                            {
                                foreach (var param in setting.RequestBodyParams)
                                {
                                    body += body == "?" ? string.Empty : "&";
                                    body += $"{param.ParamName}='{param.Value}'";
                                }
                            }

                            // replace key words in url
                            body = body.Replace("{StartIndex}", startIndex.ToString());
                            body = body.Replace("{ItemsPerPage}", itemsPerPage.ToString());
                            body = body.Replace("{CurrentPage}", currentPage.ToString());
                        }

                        // add the url to list
                        urls.Add(new UrlPair {
                            Url = url, RequestBody = body
                        });

                        currentPage++;
                        startIndex = ((ind + 1) * itemsPerPage) + patternConfig.StartIndex;
                    }
                }


                var siteURLs = urls.ToArray();

                if (siteURLs != null && siteURLs.Length > 0)
                {
                    int splitSize = GetSplitSize(siteURLs);
                    var arrays    = siteURLs.Split(splitSize);

                    foreach (var arr in arrays)
                    {
                        var URLs = new List <KeyValuePair <Identifier, UrlPair> >();

                        // add guid to url
                        foreach (var url in arr.ToArray())
                        {
                            URLs.Add(new KeyValuePair <Identifier, UrlPair>(
                                         new Identifier {
                                UniqueID = string.Empty, MappingID = string.Empty
                            },
                                         new UrlPair {
                                Url = url.Url, RequestBody = url.RequestBody
                            }
                                         ));
                        }

                        WebsiteInformation website = new WebsiteInformation()
                        {
                            Name          = defaultConfig.WebsiteNamePrefix,
                            URL           = defaultConfig.WebsiteURL,
                            webScrapeType = defaultConfig.webScrapeType,
                            HttpVerb      = patternConfig.HttpVerb,
                            URLs          = URLs
                        };

                        if (!string.IsNullOrEmpty(website.Name))
                        {
                            input.Websites.Add(website);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Write("Exception in SetWebsitePatternConfiguration -- ScrapeProductsListInput -> DataGrabber. Message: " + ex.Message);
            }
            finally
            {
            }
        }
コード例 #2
0
        public void SetDefaultConfiguration(WebScrapeUserInput data, ref WebScrapeInput input)
        {
            // Set Website Pattern Configuration
            SetWebsitePatternConfiguration(data, ref input);

            try
            {
                var defaultConfig = data.DefaultWebsiteConfiguration;

                if (defaultConfig == null)
                {
                    return;
                }

                switch (defaultConfig.WebScrapeType)
                {
                case ScrapeType.SingleURL:
                {
                    WebsiteInformation website = new WebsiteInformation()
                    {
                        Name          = defaultConfig.WebsiteNamePrefix,
                        URL           = defaultConfig.WebsiteURL,
                        webScrapeType = defaultConfig.webScrapeType,
                        HttpVerb      = HttpVerbType.GET
                    };

                    if (!string.IsNullOrEmpty(website.Name))
                    {
                        input.Websites.Add(website);
                    }

                    // TODO: move to multiple URLs
                    if (defaultConfig.WebsiteURLsWithBody != null)
                    {
                        foreach (var urlsWithBody in defaultConfig.WebsiteURLsWithBody)
                        {
                            website = new WebsiteInformation()
                            {
                                Name          = defaultConfig.WebsiteNamePrefix,
                                URL           = urlsWithBody.Url,
                                RequestBody   = urlsWithBody.Body,
                                webScrapeType = defaultConfig.webScrapeType,
                                HttpVerb      = HttpVerbType.POST
                            };

                            if (!string.IsNullOrEmpty(website.Name))
                            {
                                input.Websites.Add(website);
                            }
                        }
                    }

                    break;
                }

                case ScrapeType.MultipleURLs:
                {
                    var siteURLs = defaultConfig.WebsiteURLs;

                    if (siteURLs != null && siteURLs.Length > 0)
                    {
                        int splitSize = GetSplitSize(siteURLs);
                        var arrays    = siteURLs.Split(splitSize);

                        foreach (var arr in arrays)
                        {
                            var URLs = new List <KeyValuePair <Identifier, UrlPair> >();

                            // add guid to url
                            foreach (var url in arr.ToArray())
                            {
                                URLs.Add(new KeyValuePair <Identifier, UrlPair>(
                                             new Identifier {
                                        UniqueID = string.Empty, MappingID = string.Empty
                                    },
                                             new UrlPair {
                                        Url = url
                                    }
                                             ));
                            }

                            WebsiteInformation website = new WebsiteInformation()
                            {
                                Name          = defaultConfig.WebsiteNamePrefix,
                                URL           = defaultConfig.WebsiteURL,
                                webScrapeType = defaultConfig.webScrapeType,
                                HttpVerb      = HttpVerbType.GET,
                                URLs          = URLs
                            };

                            if (!string.IsNullOrEmpty(website.Name))
                            {
                                input.Websites.Add(website);
                            }
                        }
                    }

                    break;
                }


                default:
                    break;
                }

                if (input.Websites.Count == 0)
                {
                    if (defaultConfig.WebScrapeType == ScrapeType.MultipleURLs)
                    {
                        // set default data if there are no websites
                        WebsiteInformation website = new WebsiteInformation()
                        {
                            Name          = defaultConfig.WebsiteNamePrefix,
                            URL           = defaultConfig.WebsiteURL,
                            webScrapeType = defaultConfig.webScrapeType
                        };

                        if (!string.IsNullOrEmpty(website.Name))
                        {
                            input.Websites.Add(website);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Write("Exception in SetDefaultConfiguration -- ScrapeProductsListInput -> DataGrabber. Message: " + ex.Message);
            }
            finally
            {
            }
        }