SearchResultOverview scrapeSearchResultsOverview(HtmlNode node) { var searchResult = new SearchResultOverview(); /* * <p class="result-overview"> * <strong class="search-count">132,700</strong> Results * </p> */ var searchCountElement = node.GetNodesByCssClass("search-count"); if (searchCountElement != null) { var searchResultInt = 0; var searchResultString = searchCountElement.InnerText.Replace(",", String.Empty); //Remove commas if (!int.TryParse(searchResultString, out searchResultInt)) { //TODO: Log this as a warning } searchResult.SearchCount = searchResultInt; } return(searchResult); }
public static SearchFormatterResult ComputeUncertain(SearchResultOverview results) { var maxes = new Dictionary <string, decimal>(); foreach (var item in results.Items) { if (!maxes.ContainsKey(item.Source)) { maxes.Add(item.Source, item.LotPrice); } if (maxes[item.Source] < item.LotPrice) { maxes[item.Source] = item.LotPrice; } } decimal minMax = maxes.Min(x => x.Value); var uncertainItems = results.Items.Where(x => x.LotPrice >= minMax); var ret = new SearchFormatterResult() { UncertainItems = uncertainItems, Results = null }; return(ret); }
public SearchResultOverview DecodePage(string html) { try { var doc = new HtmlDocument(); doc.LoadHtml(html); //If zero1 then if (doc.DocumentNode.GetNodesByCssClass("zero1") == null) { var searchResultOverview = extractResultsOverview(doc.DocumentNode); PagingKey = extractPagingKey(doc.DocumentNode); //select element holding items var itemElements = doc.DocumentNode.Descendants().Where(p => p.Attributes.Contains("class") && p.Attributes["class"].Value.Contains("listitem")); //Cycle through all the elements foreach (var element in itemElements) { var item = decodeItemnode(element); searchResultOverview.Items.Add(item); } searchResultOverview.Extra.Add("PagingKey", PagingKey); return(searchResultOverview); } else { var searchResultsOverview = new SearchResultOverview() { SearchCount = 0, Items = new List <Item>() }; return(searchResultsOverview); } } catch (Exception e) { //TODO: Log this as a warning, high priority, make sure that we aren't returning nothing for accurate searches return(new SearchResultOverview()); } }
public static async Task <SearchResultOverview> Search(WebSearchService[] services) { var aggregatedResults = new SearchResultOverview(); var resultTasks = new List <Task <SearchResultOverview> >(); foreach (WebSearchService service in services) { resultTasks.Add(service.SearchItems()); } var results = await Task.WhenAll(resultTasks); foreach (var result in results) { aggregatedResults.SearchCount += result.SearchCount; aggregatedResults.Items.AddRange(result.Items); } return(aggregatedResults); }
public async Task <SearchResultOverview> SearchItems(SearchCriteria search) { //Check for cached item list string key = RedisKeyConvert.Serialize(search); SearchResultOverview result = null; try { storeSearchHistory(search); if (await cache.Exists(key)) { var cachedResult = JsonConvert.DeserializeObject <SearchResultOverview>(await cache.GetString(key)); result = cachedResult; } else { await SearchServiceProvider.RetrieveSearchServices(services, cache, search, allNew : true); var results = await SearchDispatcher.Search(services, new int[] { search.Page }); var formattedResults = SearchFormatter.FormatResults(0, results); await searchCache.CacheSearch(search, services, formattedResults.Results); result = formattedResults.Results; } searchCache.StartCacheJob(search, null); } catch (Exception e) { await raven.CaptureNetCoreEventAsync(e); } return(result); }
public static SearchFormatterResult FormatResults(int page, SearchResultOverview results) { var formatedResults = new SearchResultOverview() { Extra = results.Extra, Items = results.Items, SearchCount = results.SearchCount }; formatedResults.Items = formatedResults.Items .OrderBy(x => x.Price != null && x.Price.Length > 0 ? x.Price[0] : decimal.MaxValue) .Skip((page - 1) * ResultsPerPage) .Take(ResultsPerPage) .ToList(); var ret = new SearchFormatterResult() { UncertainItems = null, Results = formatedResults }; return(ret); }
public override async Task <SearchResultOverview> SearchItems() { var search = this.ServiceModel; string endpoint = String.Empty; string pagingKey = search.PageKey; var resultTasks = new List <Task <string> >(); SearchResultOverview overallResult = null; //Get the first page if we have no pagekey if (this.ServiceModel.PageKey == null || this.ServiceModel.PageKey == String.Empty) { endpoint = new DHGateQueryString().Convert(search.Criteria); try { var response = await(await http.Get(endpoint)).Content.ReadAsStringAsync(); overallResult = new DHGatePageDecoder().DecodePage(response); this.ServiceModel.PopulateState(overallResult); } catch (Exception e) { await raven.CaptureNetCoreEventAsync(e); return(new SearchResultOverview()); } } foreach (int page in search.Pages) { if (page < search.Page) { continue; } if (page == 0) { continue; } search.Page = page; endpoint = new DHGateQueryString().ConvertWithPage(search, this.ServiceModel.PageKey); try { resultTasks.Add((await http.Get(endpoint)).Content.ReadAsStringAsync()); } catch (Exception e) { await raven.CaptureNetCoreEventAsync(e); } } var responses = await Task.WhenAll(resultTasks); foreach (var response in responses) { var oneResult = new DHGatePageDecoder().DecodePage(response); if (overallResult == null) { overallResult = oneResult; this.ServiceModel.PopulateState(oneResult); } else { overallResult.Items.AddRange(oneResult.Items); } } return((overallResult != null) ? overallResult : new SearchResultOverview()); }
//Function that actually goes out and gets Aliexpress search and converts it async Task <SearchResultOverview> searchItems(SearchServiceModel search) { var resultTasks = new List <Task <string> >(); SearchResultOverview results = null; foreach (var page in search.Pages) { search.Page = page; string endpoint = new AliexpressQueryString().Convert(search); #region Error Tracking //Add breadcrumb for error monitoring var crumb = new Breadcrumb("AliexpressService") { Message = $"GET {endpoint}", Data = new Dictionary <string, string>() { { "Aliexpress URL", endpoint } } }; raven.AddTrail(crumb); #endregion try { resultTasks.Add((await http.Get(endpoint)).Content.ReadAsStringAsync()); } catch (Exception e) { var sentry = new SentryEvent(e); await raven.CaptureNetCoreEventAsync(sentry); } } var responses = await Task.WhenAll(resultTasks); foreach (var response in responses) { try { var result = new AliexpressPageDecoder().ScrapeSearchResults(response); if (results == null) { results = result; this.ServiceModel.PopulateState(results); } else { results.Items.AddRange(result.Items); } } catch (Exception e) { var sentry = new SentryEvent(e); await raven.CaptureNetCoreEventAsync(sentry); } } if (results != null && results.Items.Count > 0) { StoreSearchItems(results.Items); } return((results != null) ? results : new SearchResultOverview()); }
public async Task CacheSearch(SearchCriteria criteria, WebSearchService[] services, SearchResultOverview result) { var key = RedisKeyConvert.Serialize(criteria); //Cache the search & service models try { await cacheItemSearch(criteria, result); await cacheServices(services); } catch (Exception e) { var sentry = new SentryEvent(e); sentry.Message = $"Error when saving to cache: {e.Message}"; await raven.CaptureNetCoreEventAsync(sentry); } }
async Task cacheItemSearch(SearchCriteria criteria, SearchResultOverview result) { var key = RedisKeyConvert.Serialize(criteria); await cache.StoreString(key, JsonConvert.SerializeObject(result)); }