public void Scrape() { _scrapedShoes.Clear(); ScrapeResponse serviceResponse = new ScrapeResponse(); try { string[,] websites = WebScraperFactory.Websites; WebScraper scraper; ScrapePageResponse websiteResponse = new ScrapePageResponse(); string[] website; for (int i = 0; i < WebScraperFactory.WebsiteCount; ++i) { scraper = WebScraperFactory.CreateWebScraper(websites[i, 0]); website = new string[] { websites[i, 0], websites[i, 1] }; ScrapePageRequest request = new ScrapePageRequest { Website = website, ShoeList = _scrapedShoes }; websiteResponse = scraper.ScrapeWebpage(request); if (!websiteResponse.Success) { break; } } serviceResponse.Success = websiteResponse.Success; serviceResponse.Message = websiteResponse.Message; serviceResponse.ScrapeCount = _scrapedShoes.Count; } catch (Exception ex) { serviceResponse.Success = false; serviceResponse.Message = ex.Message; serviceResponse.ScrapeCount = 0; _scrapedShoes.Clear(); } finally { FilterShoes(""); if (ScrapeCompletedEvent != null) { ScrapeCompletedEvent(this, new ScrapeResponseEventArgs(serviceResponse)); } } }
public void SerializesScrapeResponse() { var lst = new List <ScrapeInfo>(1) { new ScrapeInfo(0x05, 0x07, 0x06) }; var scrapeResponse = new ScrapeResponse(lst.AsReadOnly(), 0xfaff); var serialized = GetSerializer().Serialize(scrapeResponse); Assert.Equal(SharedData.ScrapeResponse, serialized); }
/// <inheritdoc/> public byte[] Serialize(ScrapeResponse response) { var bytes = new byte[8 + response.Info.Count * 12]; var buf = _util.GetBytes((int)UdpActions.Scrape); Array.Copy(buf, 0, bytes, 0, buf.Length); buf = _util.GetBytes(response.TransactionId); Array.Copy(buf, 0, bytes, 4, buf.Length); for (var i = 0; i < response.Info.Count; i++) { buf = _util.GetBytes(response.Info[i].Seeders); Array.Copy(buf, 0, bytes, 8 + 12 * i, buf.Length); buf = _util.GetBytes(response.Info[i].Completed); Array.Copy(buf, 0, bytes, 12 + 12 * i, buf.Length); buf = _util.GetBytes(response.Info[i].Leechers); Array.Copy(buf, 0, bytes, 16 + 12 * i, buf.Length); } return(bytes); }
private void updateOnDataLoaded(ScrapeResponse response) { _isRefreshingDisplay = true; _view.ClearFilteredDisplay(); if (response.Success) { _view.DisplayFilteredPageCount(_service.FilteredPageCount); _view.ClearFilteredDisplay(); _view.DisplayFilteredPage(_service.GetFilteredPage(0)); } else { _view.DisplayFilteredPageCount(0); _view.Message = response.Message; } _isRefreshingDisplay = false; }
public ScrapeResponseEventArgs(ScrapeResponse response) { this.response = response; }
public async Task <IActionResult> Get([FromQuery] ScrapeRequest scrapeRequest) { //input validation has already handled here string keywords = scrapeRequest.Keywords; string matchingKeywords = scrapeRequest.MatchingKeywords; string baseUrl = "https://www.google.com.au/search"; string unknownErrorMessage = "Result Div not found. Please contact the dev team to update this software."; try { //build final search url string finalUrl = _scrapingService.BuildGoogleSearchUrl(baseUrl, keywords, scrapeRequest.ResultNum); //open browser to get full page source _webDriver.Navigate().GoToUrl(finalUrl); string pageSource = _webDriver.PageSource; _webDriver.Quit(); //finalUrl = "https://www.ebay.com"; //optional: download html to a file await _scrapingService.SaveToFileAsync(pageSource, keywords, DateTime.Today); logger.Here().Information("Saved page source to file successfully"); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(pageSource); var rsoDiv = htmlDocument.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("id", "") == "rso").ToList(); if (rsoDiv.Count == 0) { //TODO: this should be 500 error return(ErrorResponseBadRequest(logger.Here(), unknownErrorMessage, ErrorType.Unknown)); } var searchResultComponentDivs = rsoDiv[0].Descendants("div") .Where(node => node.GetAttributeValue("class", "") == "bkWMgd") .Where(node => !node.Descendants("div").Any(childNode => childNode.GetAttributeValue("class", "") == "g kno-kp mnr-c g-blk")) .ToList(); if (searchResultComponentDivs.Count == 0) { //TODO: this should be 500 error return(ErrorResponseBadRequest(logger.Here(), unknownErrorMessage, ErrorType.Unknown)); } var searchResultDivs = new List <HtmlNode>(); foreach (var searchResultComponentDiv in searchResultComponentDivs) { var searchResultDivList = searchResultComponentDiv.Descendants("div") .Where(node => node.GetAttributeValue("class", "") == "rc") .ToList(); if (searchResultDivList.Count == 0) { searchResultDivs.Add(searchResultComponentDiv); //anyway, vedio, img block count for 1 result } else { searchResultDivs.AddRange(searchResultDivList); } } List <GoogleSearchResult> googleSearchResults = new List <GoogleSearchResult>(); for (int i = 0; i < searchResultDivs.Count; i++) { GoogleSearchResult googleSearchResult = new GoogleSearchResult { Links = new List <string>() }; //index googleSearchResult.Index = i + 1; //start from 1 //titles var titleDivNode = searchResultDivs[i].Descendants("h3") .FirstOrDefault(node => node.GetAttributeValue("class", "") == "LC20lb"); if (titleDivNode != null) { googleSearchResult.Title = titleDivNode.InnerText; } //links var anchorNodes = searchResultDivs[i].Descendants("a") .Where(node => node.GetAttributeValue("href", "").StartsWith("http")) .ToList(); foreach (var anchornode in anchorNodes) { string link = anchornode.GetAttributeValue("href", ""); googleSearchResult.Links.Add(link); } //description var descriptionDivNode = searchResultDivs[i].Descendants("div") .FirstOrDefault(node => node.GetAttributeValue("class", "") == "s"); var descriptionNode = descriptionDivNode?.Descendants("span") .FirstOrDefault(node => node.GetAttributeValue("class", "") == "st"); if (descriptionNode != null) { googleSearchResult.Description = descriptionNode.InnerText; } googleSearchResults.Add(googleSearchResult); } List <GoogleSearchResult> matchingGoogleSearchResults = new List <GoogleSearchResult>(); foreach (var googleSearchResult in googleSearchResults) { var links = googleSearchResult.Links; bool shouldContinue = false; foreach (var link in links) { if (link != null && link.ToLowerInvariant().Contains(matchingKeywords.ToLowerInvariant())) { matchingGoogleSearchResults.Add(googleSearchResult); //here we break the current loop then continue shouldContinue = true; break; } } if (shouldContinue) { continue; } if (googleSearchResult.Title != null && googleSearchResult.Title.ToLowerInvariant().Contains(matchingKeywords.ToLowerInvariant())) { matchingGoogleSearchResults.Add(googleSearchResult); continue; } if (googleSearchResult.Description != null && googleSearchResult.Description.ToLowerInvariant().Contains(matchingKeywords.ToLowerInvariant())) { matchingGoogleSearchResults.Add(googleSearchResult); continue; } } //init response ScrapeResponse scrapeResponse = new ScrapeResponse { MatchingResults = matchingGoogleSearchResults, MatchingPositions = _scrapingService.GetPositionsFromMatchingResults(matchingGoogleSearchResults), Message = "Scraped search result successfully!" }; return(Ok(scrapeResponse)); } catch (Exception ex) { //TODO: this should be 500 error return(ErrorResponseBadRequest(logger.Here(), ex.Message, ErrorType.ApiExpection)); } }
public void ReceivedData(UdpReceiveResult res, UdpClient client) { var receivedData = res.Buffer; var endPointAddress = res.RemoteEndPoint.Address; var addressString = endPointAddress.ToString(); if (receivedData.Length > 12) { var action = Unpack.UInt32(receivedData, 8); //connect,announce,scrape switch (action) { case 0: var connectRequest = new ConnectRequest(receivedData); Console.WriteLine("[Connect] from " + addressString + ":" + res.RemoteEndPoint.Port); var connectResponse = new ConnectResponse(0, connectRequest.TransactionID, (long)13376969); SendDataAsync(client, connectResponse.Data, res.RemoteEndPoint); break; case 1: var announceRequest = new AnnounceRequest(receivedData); Console.WriteLine("[Announce] from " + addressString + ":" + announceRequest.Port + ", " + (TorrentEvent)announceRequest.TorrentEvent); var peer = new TorrentPeer(addressString, announceRequest.Port); if ((TorrentEvent)announceRequest.TorrentEvent != TorrentEvent.Stopped) { RedisBacking.AddPeer(peer, announceRequest.InfoHash); } else { RedisBacking.RemovePeer(peer, announceRequest.InfoHash); } var peers = RedisBacking.GetPeers(announceRequest.InfoHash); var torrentInfo = RedisBacking.ScrapeHashes(new List <byte[]>() { announceRequest.InfoHash }); var announceResponse = new AnnounceResponse(announceRequest.TransactionID, AnnounceInterval, torrentInfo.First().Leechers, torrentInfo.First().Seeders, peers); SendDataAsync(client, announceResponse.Data, res.RemoteEndPoint); break; case 2: var scrapeRequest = new ScrapeRequest(receivedData); Console.WriteLine(string.Format("[Scrape] from {0} for {1} torrents", addressString, scrapeRequest.InfoHashes.Count)); var scrapedTorrents = RedisBacking.ScrapeHashes(scrapeRequest.InfoHashes); var scrapeResponse = new ScrapeResponse(scrapeRequest.TransactionID, scrapedTorrents); SendDataAsync(client, scrapeResponse.Data, res.RemoteEndPoint); break; default: Console.WriteLine(Encoding.UTF8.GetString(receivedData)); break; } } else { Console.WriteLine(Encoding.UTF8.GetString(receivedData)); } }