Exemple #1
0
        public void Scrape()
        {
            _scrapedShoes.Clear();

            ScrapeResponse serviceResponse = new ScrapeResponse();

            try
            {
                string[,] websites = WebScraperFactory.Websites;
                WebScraper         scraper;
                ScrapePageResponse websiteResponse = new ScrapePageResponse();
                string[]           website;

                for (int i = 0; i < WebScraperFactory.WebsiteCount; ++i)
                {
                    scraper = WebScraperFactory.CreateWebScraper(websites[i, 0]);
                    website = new string[] { websites[i, 0], websites[i, 1] };

                    ScrapePageRequest request = new ScrapePageRequest
                    {
                        Website  = website,
                        ShoeList = _scrapedShoes
                    };

                    websiteResponse = scraper.ScrapeWebpage(request);

                    if (!websiteResponse.Success)
                    {
                        break;
                    }
                }

                serviceResponse.Success     = websiteResponse.Success;
                serviceResponse.Message     = websiteResponse.Message;
                serviceResponse.ScrapeCount = _scrapedShoes.Count;
            }
            catch (Exception ex)
            {
                serviceResponse.Success     = false;
                serviceResponse.Message     = ex.Message;
                serviceResponse.ScrapeCount = 0;
                _scrapedShoes.Clear();
            }
            finally
            {
                FilterShoes("");

                if (ScrapeCompletedEvent != null)
                {
                    ScrapeCompletedEvent(this, new ScrapeResponseEventArgs(serviceResponse));
                }
            }
        }
        public void SerializesScrapeResponse()
        {
            var lst = new List <ScrapeInfo>(1)
            {
                new ScrapeInfo(0x05, 0x07, 0x06)
            };

            var scrapeResponse = new ScrapeResponse(lst.AsReadOnly(), 0xfaff);
            var serialized     = GetSerializer().Serialize(scrapeResponse);

            Assert.Equal(SharedData.ScrapeResponse, serialized);
        }
        /// <inheritdoc/>
        public byte[] Serialize(ScrapeResponse response)
        {
            var bytes = new byte[8 + response.Info.Count * 12];
            var buf   = _util.GetBytes((int)UdpActions.Scrape);

            Array.Copy(buf, 0, bytes, 0, buf.Length);
            buf = _util.GetBytes(response.TransactionId);
            Array.Copy(buf, 0, bytes, 4, buf.Length);
            for (var i = 0; i < response.Info.Count; i++)
            {
                buf = _util.GetBytes(response.Info[i].Seeders);
                Array.Copy(buf, 0, bytes, 8 + 12 * i, buf.Length);
                buf = _util.GetBytes(response.Info[i].Completed);
                Array.Copy(buf, 0, bytes, 12 + 12 * i, buf.Length);
                buf = _util.GetBytes(response.Info[i].Leechers);
                Array.Copy(buf, 0, bytes, 16 + 12 * i, buf.Length);
            }
            return(bytes);
        }
Exemple #4
0
        private void updateOnDataLoaded(ScrapeResponse response)
        {
            _isRefreshingDisplay = true;

            _view.ClearFilteredDisplay();

            if (response.Success)
            {
                _view.DisplayFilteredPageCount(_service.FilteredPageCount);
                _view.ClearFilteredDisplay();
                _view.DisplayFilteredPage(_service.GetFilteredPage(0));
            }
            else
            {
                _view.DisplayFilteredPageCount(0);
                _view.Message = response.Message;
            }

            _isRefreshingDisplay = false;
        }
Exemple #5
0
 public ScrapeResponseEventArgs(ScrapeResponse response)
 {
     this.response = response;
 }
        public async Task <IActionResult> Get([FromQuery] ScrapeRequest scrapeRequest)
        {
            //input validation has already handled here

            string keywords            = scrapeRequest.Keywords;
            string matchingKeywords    = scrapeRequest.MatchingKeywords;
            string baseUrl             = "https://www.google.com.au/search";
            string unknownErrorMessage = "Result Div not found. Please contact the dev team to update this software.";

            try
            {
                //build final search url
                string finalUrl = _scrapingService.BuildGoogleSearchUrl(baseUrl, keywords, scrapeRequest.ResultNum);

                //open browser to get full page source
                _webDriver.Navigate().GoToUrl(finalUrl);
                string pageSource = _webDriver.PageSource;
                _webDriver.Quit();
                //finalUrl = "https://www.ebay.com";
                //optional: download html to a file
                await _scrapingService.SaveToFileAsync(pageSource, keywords, DateTime.Today);

                logger.Here().Information("Saved page source to file successfully");

                var htmlDocument = new HtmlDocument();
                htmlDocument.LoadHtml(pageSource);

                var rsoDiv = htmlDocument.DocumentNode.Descendants("div")
                             .Where(node => node.GetAttributeValue("id", "") == "rso").ToList();

                if (rsoDiv.Count == 0)
                {
                    //TODO: this should be 500 error
                    return(ErrorResponseBadRequest(logger.Here(), unknownErrorMessage, ErrorType.Unknown));
                }

                var searchResultComponentDivs = rsoDiv[0].Descendants("div")
                                                .Where(node => node.GetAttributeValue("class", "") == "bkWMgd")
                                                .Where(node => !node.Descendants("div").Any(childNode => childNode.GetAttributeValue("class", "") == "g kno-kp mnr-c g-blk"))
                                                .ToList();
                if (searchResultComponentDivs.Count == 0)
                {
                    //TODO: this should be 500 error
                    return(ErrorResponseBadRequest(logger.Here(), unknownErrorMessage, ErrorType.Unknown));
                }
                var searchResultDivs = new List <HtmlNode>();
                foreach (var searchResultComponentDiv in searchResultComponentDivs)
                {
                    var searchResultDivList = searchResultComponentDiv.Descendants("div")
                                              .Where(node => node.GetAttributeValue("class", "") == "rc")
                                              .ToList();
                    if (searchResultDivList.Count == 0)
                    {
                        searchResultDivs.Add(searchResultComponentDiv);  //anyway, vedio, img block count for 1 result
                    }
                    else
                    {
                        searchResultDivs.AddRange(searchResultDivList);
                    }
                }
                List <GoogleSearchResult> googleSearchResults = new List <GoogleSearchResult>();

                for (int i = 0; i < searchResultDivs.Count; i++)
                {
                    GoogleSearchResult googleSearchResult = new GoogleSearchResult
                    {
                        Links = new List <string>()
                    };
                    //index
                    googleSearchResult.Index = i + 1;  //start from 1
                    //titles
                    var titleDivNode = searchResultDivs[i].Descendants("h3")
                                       .FirstOrDefault(node => node.GetAttributeValue("class", "") == "LC20lb");

                    if (titleDivNode != null)
                    {
                        googleSearchResult.Title = titleDivNode.InnerText;
                    }

                    //links
                    var anchorNodes = searchResultDivs[i].Descendants("a")
                                      .Where(node => node.GetAttributeValue("href", "").StartsWith("http"))
                                      .ToList();

                    foreach (var anchornode in anchorNodes)
                    {
                        string link = anchornode.GetAttributeValue("href", "");
                        googleSearchResult.Links.Add(link);
                    }

                    //description
                    var descriptionDivNode = searchResultDivs[i].Descendants("div")
                                             .FirstOrDefault(node => node.GetAttributeValue("class", "") == "s");
                    var descriptionNode = descriptionDivNode?.Descendants("span")
                                          .FirstOrDefault(node => node.GetAttributeValue("class", "") == "st");
                    if (descriptionNode != null)
                    {
                        googleSearchResult.Description = descriptionNode.InnerText;
                    }
                    googleSearchResults.Add(googleSearchResult);
                }

                List <GoogleSearchResult> matchingGoogleSearchResults = new List <GoogleSearchResult>();
                foreach (var googleSearchResult in googleSearchResults)
                {
                    var  links          = googleSearchResult.Links;
                    bool shouldContinue = false;
                    foreach (var link in links)
                    {
                        if (link != null && link.ToLowerInvariant().Contains(matchingKeywords.ToLowerInvariant()))
                        {
                            matchingGoogleSearchResults.Add(googleSearchResult);
                            //here we break the current loop then continue
                            shouldContinue = true;
                            break;
                        }
                    }
                    if (shouldContinue)
                    {
                        continue;
                    }
                    if (googleSearchResult.Title != null && googleSearchResult.Title.ToLowerInvariant().Contains(matchingKeywords.ToLowerInvariant()))
                    {
                        matchingGoogleSearchResults.Add(googleSearchResult);
                        continue;
                    }
                    if (googleSearchResult.Description != null && googleSearchResult.Description.ToLowerInvariant().Contains(matchingKeywords.ToLowerInvariant()))
                    {
                        matchingGoogleSearchResults.Add(googleSearchResult);
                        continue;
                    }
                }

                //init response
                ScrapeResponse scrapeResponse = new ScrapeResponse
                {
                    MatchingResults   = matchingGoogleSearchResults,
                    MatchingPositions = _scrapingService.GetPositionsFromMatchingResults(matchingGoogleSearchResults),
                    Message           = "Scraped search result successfully!"
                };
                return(Ok(scrapeResponse));
            }
            catch (Exception ex)
            {
                //TODO: this should be 500 error
                return(ErrorResponseBadRequest(logger.Here(), ex.Message, ErrorType.ApiExpection));
            }
        }
Exemple #7
0
        public void ReceivedData(UdpReceiveResult res, UdpClient client)
        {
            var receivedData    = res.Buffer;
            var endPointAddress = res.RemoteEndPoint.Address;
            var addressString   = endPointAddress.ToString();

            if (receivedData.Length > 12)
            {
                var action = Unpack.UInt32(receivedData, 8); //connect,announce,scrape
                switch (action)
                {
                case 0:
                    var connectRequest = new ConnectRequest(receivedData);
                    Console.WriteLine("[Connect] from " + addressString + ":" + res.RemoteEndPoint.Port);

                    var connectResponse = new ConnectResponse(0, connectRequest.TransactionID, (long)13376969);
                    SendDataAsync(client, connectResponse.Data, res.RemoteEndPoint);
                    break;


                case 1:
                    var announceRequest = new AnnounceRequest(receivedData);
                    Console.WriteLine("[Announce] from " + addressString + ":" + announceRequest.Port + ", " + (TorrentEvent)announceRequest.TorrentEvent);

                    var peer = new TorrentPeer(addressString, announceRequest.Port);

                    if ((TorrentEvent)announceRequest.TorrentEvent != TorrentEvent.Stopped)
                    {
                        RedisBacking.AddPeer(peer, announceRequest.InfoHash);
                    }
                    else
                    {
                        RedisBacking.RemovePeer(peer, announceRequest.InfoHash);
                    }

                    var peers       = RedisBacking.GetPeers(announceRequest.InfoHash);
                    var torrentInfo = RedisBacking.ScrapeHashes(new List <byte[]>()
                    {
                        announceRequest.InfoHash
                    });
                    var announceResponse = new AnnounceResponse(announceRequest.TransactionID, AnnounceInterval, torrentInfo.First().Leechers, torrentInfo.First().Seeders, peers);
                    SendDataAsync(client, announceResponse.Data, res.RemoteEndPoint);
                    break;


                case 2:
                    var scrapeRequest = new ScrapeRequest(receivedData);
                    Console.WriteLine(string.Format("[Scrape] from {0} for {1} torrents", addressString, scrapeRequest.InfoHashes.Count));

                    var scrapedTorrents = RedisBacking.ScrapeHashes(scrapeRequest.InfoHashes);
                    var scrapeResponse  = new ScrapeResponse(scrapeRequest.TransactionID, scrapedTorrents);

                    SendDataAsync(client, scrapeResponse.Data, res.RemoteEndPoint);

                    break;

                default:
                    Console.WriteLine(Encoding.UTF8.GetString(receivedData));
                    break;
                }
            }
            else
            {
                Console.WriteLine(Encoding.UTF8.GetString(receivedData));
            }
        }