public static List <int> GetMoviesIds(string pageElement) { return(WebPageParser .UniteParsedMultibleValues(pageElement, movieIdExpr) .Select(int.Parse) .Distinct() .ToList()); }
public void NodesAreReturned() { var webPageParser = new WebPageParser(_url); var nodes = webPageParser.GetNodes("cnn"); nodes.Should().NotBeEmpty(); }
public void NoNodes() { var webPageParser = new WebPageParser(_url); var searchPhrase = "frisjdnviuejksmfnhks"; var nodes = webPageParser.GetNodes(searchPhrase); nodes.Should().BeEmpty(); }
public void NodesPropertiesAreAssigned() { var webPageParser = new WebPageParser(_url); var searchPhrase = "cnn"; var nodes = webPageParser.GetNodes(searchPhrase); var node = nodes.First(); node.SearchPhrase.Should().Be(searchPhrase); node.Url.Should().Be(_url); }
public static async Task <string> GetMovieSearchPageAsync(string title) { List <string> parseResult; var searchResultPage = await WebPageParser.GetPageAsync(KinopoiskUri, searchRequestPrefix + title); if (WebPageParser.TryParsePage(searchResultPage, movieNotFoundExpr, out parseResult)) { throw new ArgumentException("Movie not found"); } return(searchResultPage); }
private static string GetPageBlock(string request, Regex pageBlockExpr, string error = "Error while parsing page block") { List <string> parseResult; var page = WebPageParser.GetPageAsync(KinopoiskUri, request).Result; if (!WebPageParser.TryParsePage(page, pageBlockExpr, out parseResult)) { throw new ArgumentException(error); } return(parseResult[0]); }
public async Task <MovieInfo> GetMovieInfoAsync(int movieId) { using (var cancellationTokenSource = new CancellationTokenSource()) { var pageTask = WebPageParser.GetPageAsync(KinopoiskApiUri, $"/api/kinopoisk.json?id={movieId}&token={token}"); var completedTask = await Task.WhenAny(pageTask, Task.Delay(millisecondsDelay, cancellationTokenSource.Token)); if (pageTask != completedTask) { return(new MovieInfo()); } cancellationTokenSource.Cancel(); var page = Regex.Unescape(pageTask.Result); return(movieInfoParser.Parse(page)); } }
public async Task CorrectlyParsesAPageWithNoLink() { var restClientMock = Substitute.For <IRestClient>(); restClientMock.GetAsync(callUri).Returns(@"<html><body><h1>No Links!!</h1></body></html>"); var sut = new WebPageParser(restClientMock); var result = await sut.ParsePage(callUri.OriginalString); Assert.That(result, Is.TypeOf(typeof(WebPage))); Assert.That(result.PageUrl.OriginalString, Is.EqualTo(absoluteUri)); Assert.That(result.Links.Count, Is.EqualTo(0)); Assert.That(result.WasCrawled, Is.True); Assert.That(result.DeadLink, Is.False); await restClientMock.Received(1).GetAsync(Arg.Any <Uri>()); }
public async Task HandlesBadCalls() { var restClientMock = Substitute.For <IRestClient>(); restClientMock.GetAsync(callUri).Throws(new RestRequestFailedException()); var sut = new WebPageParser(restClientMock); var result = await sut.ParsePage(callUri.OriginalString); Assert.That(result, Is.TypeOf(typeof(WebPage))); Assert.That(result.PageUrl.OriginalString, Is.EqualTo(absoluteUri)); Assert.That(result.Links.Count, Is.EqualTo(0)); Assert.That(result.WasCrawled, Is.True); Assert.That(result.DeadLink, Is.True); await restClientMock.Received(1).GetAsync(Arg.Any <Uri>()); }
public async Task CorrectlyParsesAPageWithLinksToElements() { var restClientMock = Substitute.For <IRestClient>(); restClientMock.GetAsync(callUri).Returns(@"<a href=""testsite.com/path"">stuff</a><a href=""testsite.com/path#1"">stuff</a><a href=""#element"">stuff</a>"); var sut = new WebPageParser(restClientMock); var result = await sut.ParsePage(callUri.OriginalString); Assert.That(result, Is.TypeOf(typeof(WebPage))); Assert.That(result.PageUrl.OriginalString, Is.EqualTo(absoluteUri)); Assert.That(result.Links.Count, Is.EqualTo(2)); Assert.That(result.WasCrawled, Is.True); Assert.That(result.DeadLink, Is.False); await restClientMock.Received(1).GetAsync(Arg.Any <Uri>()); }
public void WebPageLoads() { var webPageParser = new WebPageParser(_url); webPageParser.Should().NotBeNull(); }
/// <summary> /// Imports the car from mobile de. /// </summary> /// <param name="importMobileDe">The import mobile de.</param> /// <returns></returns> public async Task <MobileDeCar> ImportCarFromMobileDe(ImportMobileDe importMobileDe) { WebPageParser webPageParser = new WebPageParser(); await webPageParser.ParseWebPage(importMobileDe.Url); MobileDeCar mobileDeCar = new MobileDeCar(); // Default setting for car mobileDeCar.Url = importMobileDe.Url; mobileDeCar.DmvCalculation.VehicleTypeId = importMobileDe.VehicleTypeId; mobileDeCar.DmvCalculation.DateOfCalculation = DateTime.UtcNow; mobileDeCar.DmvCalculation.EngineTypeId = EngineTypeEnum.FourTactsRest; mobileDeCar.UserId = ServiceLocator.Instance.Resolve <IUserProvider>().GetCurrentUserId(); // string resultNode = null; string webPageNode = null; webPageNode = webPageParser.GetWebPageNode("co2EmissionValue"); if (webPageNode != null) { resultNode = webPageParser.GetWebPageNodeNumericContent(webPageNode); mobileDeCar.DmvCalculation.Co2EmissionsValue = Convert.ToInt16(resultNode); } webPageNode = webPageParser.GetWebPageNode("kW ("); if (webPageNode != null) { resultNode = webPageParser.GetWebPageNodeNumericContent(webPageNode); mobileDeCar.DmvCalculation.EnginePowerKw = Convert.ToInt16(resultNode); } webPageNode = webPageParser.GetWebPageNode("\nEuro"); if (webPageNode != null) { resultNode = webPageParser.GetWebPageNodeStringContent(webPageNode); mobileDeCar.DmvCalculation.EuroExhaustTypeId = EnumHelper.GetEnumValue <EuroExhaustTypeEnum>(resultNode); } else { // Defaut EURO mobileDeCar.DmvCalculation.EuroExhaustTypeId = EuroExhaustTypeEnum.Euro1; } //By law every EURO5+ have to have DPF filter, most of them had for the EURO4 if ((int)mobileDeCar.DmvCalculation.EuroExhaustTypeId < 4) { mobileDeCar.DmvCalculation.DieselParticlesAbove005Limit = true; } // Set default and handlig of the FuelType mobileDeCar.DmvCalculation.FuelTypeId = FuelTypeEnum.PetrolRest; webPageNode = webPageParser.GetWebPageNode("p>\nPetrol"); webPageNode = webPageNode ?? webPageParser.GetWebPageNode("p>\nBenzin"); webPageNode = webPageNode ?? webPageParser.GetWebPageNode("p>\nHybrid (Benzin"); if (webPageNode != null) { resultNode = webPageParser.GetWebPageNodeStringContent(webPageNode); mobileDeCar.DmvCalculation.FuelTypeId = FuelTypeEnum.PetrolRest; } webPageNode = webPageParser.GetWebPageNode("p>\nDiesel"); webPageNode = webPageNode ?? webPageParser.GetWebPageNode("p>\nHybrid (Diesel"); if (webPageNode != null) { resultNode = webPageParser.GetWebPageNodeStringContent(webPageNode); mobileDeCar.DmvCalculation.FuelTypeId = FuelTypeEnum.Diesel; } webPageNode = webPageParser.GetWebPageNode(" cm³"); if (webPageNode != null) { resultNode = webPageParser.GetWebPageNodeNumericContent(webPageNode); mobileDeCar.DmvCalculation.EngineDisplacementCcm = Convert.ToInt32(resultNode); } webPageNode = webPageParser.GetWebPageNode("pricePrimaryCountryOfSale priceGross"); if (webPageNode != null) { resultNode = webPageParser.GetWebPageNodeNumericContent(webPageNode); mobileDeCar.DmvCalculation.VehicleValue = Convert.ToInt32(resultNode); } webPageNode = webPageParser.GetWebPageNode("h1>\n"); if (webPageNode != null) { mobileDeCar.Model = "Not known"; mobileDeCar.Maker = "Not known"; resultNode = webPageParser.GetWebPageNodeStringContent(webPageNode); mobileDeCar.Model = resultNode; mobileDeCar.Maker = resultNode.Split(' ').FirstOrDefault(); } webPageNode = webPageParser.GetWebPageNode("img class=\"currentImage\" src"); if (webPageNode != null) { resultNode = webPageParser.GetWebPageAttributeStringContent(webPageNode, "src"); mobileDeCar.ImageUrl = resultNode; } return(mobileDeCar); }