public void BuildRender_InheritStylesWithParent_RenderLayout() { var _renderTree = new RenderTree(); var loader = new HtmlLoader(); var window = loader.LoadFromFile("Contents/test_html_inherit_styles_with_parent.html"); _renderTree.Build(window); var root = _renderTree.LayoutRoot; var html = root.Layouts.FirstOrDefault(); var body = html.Layouts.LastOrDefault(); var mainDiv = body.Layouts.FirstOrDefault(); mainDiv.CssRule.Should().NotBeNull(); mainDiv.Layouts.FirstOrDefault().CssRule.SelectorText.Should().BeEquivalentTo(".oneLevel"); var twoLevelDiv = mainDiv.Layouts.LastOrDefault(); twoLevelDiv.CssRule.SelectorText.Should().BeEquivalentTo(".twoLevel"); var twoLevelA = twoLevelDiv.Layouts.FirstOrDefault(); twoLevelA.CssRule.SelectorText.Should().BeEquivalentTo(".twoLevel"); var twoStyle = twoLevelA.CssRule.Style; var oneStyle = mainDiv.CssRule.Style; twoStyle.Color.Should().BeEquivalentTo(oneStyle.Color); twoStyle.Background.Color.Should().BeEquivalentTo(new CSSColor(17, 17, 17)); }
private async void Worker() { loader = new HtmlLoader(parserSettings); for (int i = parserSettings.StartPoint; i <= parserSettings.EndPoint; i++) { if (!Program.ParserActive) { return; } var source = await loader.GetSourceByPageId(i); var domParser = new HtmlParser(); var document = await domParser.ParseDocumentAsync(source); Parser.Parse(document); lock (locker) OnParseOneLink?.Invoke(); } if (Program.ParserActive) { ParseComplete?.Invoke(); } }
public SitemapService(IParser <IEnumerable <string> > parser) { Parser = parser; Urls = new HashSet <string>(); loader = new HtmlLoader(); Measurements = new Dictionary <string, List <double> >(); }
public async void Work() { const string url = "https://citydog.by/allposts/category/people/"; var loader = new HtmlLoader(url); var source = await loader.GetSourceByPageId(1); var parser = new HtmlParser(); var document = parser.Parse(source); var itemsText = document.QuerySelectorAll( "div > div.headingArticle > h2 > a"); var itemsImg = document.QuerySelectorAll( "div > div.imageWrapper > a > img"); var listText = itemsText.Select(item => item.TextContent).ToList(); var listImg = itemsImg.Select(item => item.GetAttribute("src")).ToList(); var dic = listText.Zip(listImg, (k, v) => new { k, v }) .ToDictionary(x => x.k, x => x.v); foreach (var res in dic) { Console.WriteLine($"Заголовок: {res.Key}"); Console.WriteLine($"Ссылка на изображение: {res.Value}"); Console.WriteLine($"-----------------------------------------------------------"); } }
public static void LoadHtmlFile( System.IO.TextReader reader, DomDocument document, string baseUrl) { if (reader == null) { throw new ArgumentNullException("reader"); } if (document == null) { throw new ArgumentNullException("document"); } HTMLDocument htmlDoc = new HTMLDocument(); string html = reader.ReadToEnd(); htmlDoc.LoadHTML(html); if (string.IsNullOrEmpty(baseUrl) == false) { htmlDoc.BaseURL = baseUrl; } HtmlLoader loader = new HtmlLoader(); loader.Load(htmlDoc, document); document.AfterLoad(FileFormat.Html); }
public void Should_ParseCatalogContent() { // Arrange const string host = "http://www.pluralsight.com/"; var nodeSelector = new PluralsightNodeSelector(); var nodeParser = new PluralsightNodeParser(host, nodeSelector); var sut = new PluralsightCatalogParser(nodeSelector, nodeParser); // add category reference for each course foreach (var expectedCategory in ExpectedCategoriesDictionary.Values) { foreach (var expectedCourse in expectedCategory.Courses) { expectedCourse.Category = expectedCategory; } } const string catalogPath = "./../../TestData/PluralsighCatalogTestData.html"; var loader = new HtmlLoader <HtmlAgilityPackHtmlContainer>(); var catalog = loader.Load(catalogPath, LocationType.Local); // Act var result = sut.Parse(catalog); // Assert // Authors var resultAuthors = result.AuthorsParseResult.AuthorsExceptWhoseUrlNullContainer.Values .OrderBy(x => x.UrlName); var expectedAuthors = ExpectedAuthorsDictionary.Values .Where(x => x.UrlName != null) .OrderBy(x => x.UrlName); Assert.Equal(expectedAuthors, resultAuthors, PluralsightAuthor.PropertiesComparer); // Courses var resultCourses = result.CoursesParseResult.CourseContainer.Values; var expectedCourses = ExpectedCoursesDictionary.Values; CheckCoursesEquality(expectedCourses, resultCourses); // Categories var resultCategories = result.CategoriesParseResult.CategoryContainer.Values; var expectedCategories = ExpectedCategoriesDictionary.Values; Assert.Equal(expectedCategories.Count, resultCategories.Count); foreach (var resultCategory in resultCategories) { var expectedCategory = expectedCategories .Single(x => x.UrlName == resultCategory.UrlName); Assert.Equal(expectedCategory, resultCategory, PluralsightCategory.PropertiesComparer); CheckCoursesEquality(expectedCategory.Courses, resultCategory.Courses, resultCategory); } }
public Presenter()//незабываем проинициализировать { loader = new HtmlLoader(); parser = new Parser(); detector = new WordDetector(); detector.Finished += Handler; }
public static HtmlLoader BuildHtmlLoader() { var databaseProvider = new DatabaseProvider(); var templateProvider = new TemplateProvider(databaseProvider); var htmlLoader = new HtmlLoader(templateProvider); return(htmlLoader); }
public async Task InitAsync() { var isOfflineOk = await HtmlLoader.TryLoadAgentSource(true); if (!isOfflineOk) { var isOnlineOk = await HtmlLoader.TryLoadAgentSource(false); } }
internal ResourceHandler(BrowserSettings settings, IRequestFilter filter, ILogger logger, IText text) { this.algorithm = new SHA256Managed(); this.htmlLoader = new HtmlLoader(text); this.filter = filter; this.logger = logger; this.text = text; this.settings = settings; }
public void CreatesHtmlLoader() { var expectedLoader = new HtmlLoader(Mock.Of <IHtmlWrapperFactory>(), Mock.Of <IWebClientWrapper>()); var loaderFactory = CreateInstance(() => expectedLoader); var loader = loaderFactory.Create(); loader.ShouldBeSameAs(expectedLoader); }
public void RenderLayoutCompositor_AbsolutePosition_ReturnOffsetPosition() { var _renderTree = new RenderTree(); var loader = new HtmlLoader(); var window = loader.LoadFromFile("Contents/test_compositor_absolute_position.html"); _renderTree.Build(window); var root = _renderTree.LayoutRoot; var html = root.Layouts.FirstOrDefault(); var body = html.Layouts.LastOrDefault(); }
public async Task <TwitterUserModel> GetUserProfileAsync(string username) { var htmlDoc = await HtmlLoader.TryLoadAndParsePageAsync($"https://twitter.com/{username}?lang=en", GetRandomProxy()); if (htmlDoc != null) { if (HtmlExtracter.TryParseUser(htmlDoc, out var user)) { return(user); } } return(null); }
public static async Task <AngleSharp.Dom.Html.IHtmlDocument> GetDocument(int i, string link) { HtmlLoader loader = new HtmlLoader(); var source = await loader.GetSource(i, $"{link}"); if (source == "404") { return(null); } var domParser = new HtmlParser(); var document = await domParser.ParseAsync(source); return(document); }
private void InitializeControl() { var contextMenuHandler = new ContextMenuHandler(); var controlLogger = logger.CloneFor($"{nameof(BrowserControl)} #{Id}"); var dialogHandler = new DialogHandler(); var displayHandler = new DisplayHandler(); var downloadLogger = logger.CloneFor($"{nameof(DownloadHandler)} #{Id}"); var downloadHandler = new DownloadHandler(appConfig, settings, downloadLogger); var htmlLoader = new HtmlLoader(text); var keyboardHandler = new KeyboardHandler(); var lifeSpanHandler = new LifeSpanHandler(); var requestFilter = new RequestFilter(); var requestLogger = logger.CloneFor($"{nameof(RequestHandler)} #{Id}"); var requestHandler = new RequestHandler(appConfig, requestFilter, requestLogger, settings, text); Icon = new BrowserIconResource(); dialogHandler.DialogRequested += DialogHandler_DialogRequested; displayHandler.FaviconChanged += DisplayHandler_FaviconChanged; displayHandler.ProgressChanged += DisplayHandler_ProgressChanged; downloadHandler.ConfigurationDownloadRequested += DownloadHandler_ConfigurationDownloadRequested; downloadHandler.DownloadUpdated += DownloadHandler_DownloadUpdated; keyboardHandler.ReloadRequested += ReloadRequested; keyboardHandler.ZoomInRequested += ZoomInRequested; keyboardHandler.ZoomOutRequested += ZoomOutRequested; keyboardHandler.ZoomResetRequested += ZoomResetRequested; lifeSpanHandler.PopupRequested += LifeSpanHandler_PopupRequested; requestHandler.QuitUrlVisited += RequestHandler_QuitUrlVisited; requestHandler.RequestBlocked += RequestHandler_RequestBlocked; InitializeRequestFilter(requestFilter); control = new BrowserControl( contextMenuHandler, dialogHandler, displayHandler, downloadHandler, htmlLoader, keyboardHandler, lifeSpanHandler, controlLogger, requestHandler, startUrl); control.AddressChanged += Control_AddressChanged; control.LoadingStateChanged += Control_LoadingStateChanged; control.TitleChanged += Control_TitleChanged; control.Initialize(); logger.Debug("Initialized browser control."); }
public async Task <TwitterFollowPageModel> GetFollowerAsync(string username, string cursor = "-1") { var url = AddressLocator.Follower(username, cursor); var htmlDoc = await HtmlLoader.TryLoadAndParsePageAsync(url, GetRandomProxy(), false, false); if (htmlDoc != null) { if (HtmlExtracter.TryParseFollower(htmlDoc, out var followerPage)) { followerPage.BelongUserName = username; return(followerPage); } } return(null); }
public void BuildRenderBindStyleTest() { var _renderTree = new RenderTree(); var loader = new HtmlLoader(); var window = loader.LoadFromFile("Contents/test_html_bind_style.html"); _renderTree.Build(window); var root = _renderTree.LayoutRoot; var html = root.Layouts.FirstOrDefault(); var body = html.Layouts.LastOrDefault(); var button = body.Layouts.FirstOrDefault(); button.CssRule.Should().NotBeNull(); }
private async Task ProcessPage(int pageNumber, ConcurrentBag <string> stringBag) { var currentPage = await HtmlLoader.LoadAsync($"{baseUrl}/catalog/{SectionName}/{pageNumber}"); var recipeElements = currentPage.QuerySelectorAll(".h5"); Console.WriteLine($"{recipeElements.Length} recipes found at page {pageNumber}."); foreach (var recipeElement in recipeElements) { //Interlocked.Increment(ref counter); var url = $"{baseUrl}{recipeElement.GetAttribute("href")}"; stringBag.Add(url); //Console.WriteLine($"Recipe {Volatile.Read(ref counter)}: {url} added to parsing queue."); } }
private static void Main(string[] args) { var htmlCleaner = new HtmlCleaner(new ISiteHtmlCleaner[] { new DailyHtmlCleaner(), new MirrorHtmlCleaner() }); var htmlLoader = new HtmlLoader(); var articleProvider = new ArticleProvider(htmlCleaner, htmlLoader); var cosineSimilarityCalculator = new CosineSimilarityCalculator(); IDocumentFrequencyProvider dfProvider = LoadFrequencies(); var tfIdfCalculator = new TfIdfCalculator(dfProvider); var tokenizer = new Tokenizer(); var articleProcessor = new ArticleProcessor(tfIdfCalculator, tokenizer); var articleComparer = new TextProcessing.ArticleComparer(articleProvider, cosineSimilarityCalculator, articleProcessor); Console.WriteLine("Similar articles:"); double similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/news/article-2489957/Britains-spy-chiefs-grilled-MPs-television-time.html", @"http://www.mirror.co.uk/news/uk-news/mi6-mi5-gchq-bosses-questioned-2685310"); Console.WriteLine(similarity); similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/news/article-2489640/80-parents-caught-children-copying-p**n-style-dances-offensive-lyrics.html", @"http://www.mirror.co.uk/news/uk-news/miley-cyrus-twerking-kids-copying-2685363"); Console.WriteLine(similarity); Console.WriteLine("Same article:"); similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/news/article-2490296/You-STILL-likely-lose-job-recession-25s-shop-workers-risk.html", @"http://www.dailymail.co.uk/news/article-2490296/You-STILL-likely-lose-job-recession-25s-shop-workers-risk.html"); Console.WriteLine(similarity); Console.WriteLine("Different articles:"); similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/femail/article-2489984/Needy-people-likely-cheat.html", @"http://www.dailymail.co.uk/news/article-2490531/Worlds-oldest-paperboy-deliver-round-71-years-route.html"); Console.WriteLine(similarity); similarity = articleComparer.Compare( @"http://www.dailymail.co.uk/news/article-2490412/Wikileaks-journalist-spent-4-months-Edward-Snowden-leaves-Russia.html", @"http://www.dailymail.co.uk/news/article-2489994/Twitter-share-prices-soar-firms-day-trading.html"); Console.WriteLine(similarity); Console.ReadKey(); }
internal ResourceHandler( AppConfig appConfig, IRequestFilter filter, ILogger logger, BrowserSettings settings, WindowSettings windowSettings, IText text) { this.appConfig = appConfig; this.algorithm = new SHA256Managed(); this.filter = filter; this.htmlLoader = new HtmlLoader(text); this.logger = logger; this.settings = settings; this.windowSettings = windowSettings; this.text = text; }
public async Task CanParse() { // Arrange IParserSettings parserSettings = new ShopSettings(1, 1); HtmlLoader loader = new HtmlLoader(parserSettings); ShopParser parser = new ShopParser(); var domParser = new HtmlParser(); var source = await loader.GetSourceByPageId(2); var document = await domParser.ParseAsync(source); // Act var result = parser.Parse(document); //Assert Assert.IsNotNull(parser.PricesList); }
private static void Main(string[] args) { var htmlCleaner = new HtmlCleaner(new[] { new TelegraphHtmlCleaner() }); var htmlLoader = new HtmlLoader(); var articleProvider = new ArticleProvider(htmlCleaner, htmlLoader); var frequencyProvider = new DocumentFrequencyProvider(); var tokenizer = new Tokenizer(); string[] articleUrls = File.ReadAllLines(ArticlesUrlsFile, Encoding.Unicode); foreach (string articleUrl in articleUrls) { IArticle article = articleProvider.Get(articleUrl); IEnumerable <IToken> tokens = tokenizer.Tokenize(article.Text); frequencyProvider.ProcessText(tokens); } SaveFrequencies(frequencyProvider); }
public void Startup(string pathToFileIndex) { var loader = new HtmlLoader(); Window = loader.LoadFromFile(pathToFileIndex); _renderer = new HtmlRenderer(Window); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { SoundPlayer = new WindowsSoundPlayer(); } else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { // AFPlayer } RunWindow(); }
public override async Task <IEnumerable <string> > GetPages() { var pageCount = 1; var basePage = await HtmlLoader.LoadAsync($"{baseUrl}/catalog/{SectionName}"); var pager = basePage.QuerySelector(".search-pages [href]:last-child"); if (pager != null) { pageCount = PageCountParser(pager.Text()); } var stringBag = new ConcurrentBag <string>(); //var counter = 1; await Enumerable.Range(1, Math.Min(pageCount, MaxPageAmount)).ParallelForEachAsync((i) => ProcessPage(i, stringBag), Environment.ProcessorCount); return(stringBag.ToList()); }
/// <summary> /// Event handler of the search button for showing list of songs and manupulating buttons on MainWindow /// </summary> private async void SearchButton_Click(object sender, RoutedEventArgs e) { ClearSongBox(); SwitchingButtons.Visibility = Visibility.Visible; string querySongName = textBox.Text; loader = new HtmlLoader(new ParserSettings()); parser = new Parser(); try { songs = parser.Parse(await loader.LoadDocumentAsync(querySongName)).ToList(); } catch (ArgumentNullException) { errorMessage.Visibility = Visibility.Visible; return; } songNameLabels = songs.Select(song => new Label { Content = song.Name, FontFamily = new FontFamily("Microsoft YaHei UI Light") }).ToList(); songDurationLabels = songs.Select(song => new Label { Content = song.Duration, FontFamily = new FontFamily("Microsoft YaHei UI Light") }).ToList(); songDownloadButtons = songs.Select(song => new Button { Content = "Скачать", Tag = song.DownloadLink + "_%_" + song.Name + ".mp3", FontFamily = new FontFamily("Microsoft YaHei UI Light") }).ToList(); CheckForDownloadedSongs(); showSongsOnWindow(currentSongsPage); }
public static void Test(AwsKeyHelper keyHelper) { var helper = new SignedRequestHelper(keyHelper.GetAwsAccessKeyId(), keyHelper.GetAwsSecretKey(), DESTINATION); String requestUrl; String title; //TODO: AHT - Add AssociateId // Add Request week for caching key? /* * Here is an ItemLookup example where the request is stored as a dictionary. */ IDictionary <string, string> r1 = new Dictionary <string, String>(); r1["Service"] = "AWSECommerceService"; r1["Version"] = "2009-03-31"; //r1["Operation"] = "ItemLookup"; r1["Operation"] = "ItemLookup"; //r1["ItemId"] = ITEM_ID; r1["ItemId"] = isbn; r1["IdType"] = "ISBN"; r1["SearchIndex"] = "Books"; //r1["ResponseGroup"] = "Small"; r1["ResponseGroup"] = "Tags,Reviews,EditorialReview"; r1["TagsPerPage"] = "20"; /* Random params for testing */ //r1["AnUrl"] = "http://www.amazon.com/books"; //r1["AnEmailAddress"] = "*****@*****.**"; //r1["AUnicodeString"] = "αβγδεٵٶٷٸٹٺチャーハン叉焼"; //r1["Latin1Chars"] = "ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJij"; requestUrl = helper.Sign(r1); var response = new HtmlLoader().LoadUrl(requestUrl); var xDoc = XDocument.Parse(response); var x = xDoc.Descendants().Where(node => node.Name.LocalName == "Item").Select(item => Item.Create(item)); x.Count(); }
public void BuildRender_InheritStyles_RenderLayout() { var _renderTree = new RenderTree(); var loader = new HtmlLoader(); var window = loader.LoadFromFile("Contents/test_html_layout_ingerit_styles.html"); _renderTree.Build(window); var root = _renderTree.LayoutRoot; var html = root.Layouts.FirstOrDefault(); var body = html.Layouts.LastOrDefault(); var baseDiv = body.Layouts.FirstOrDefault(); baseDiv.CssRule.Should().NotBeNull(); var childrenDiv = baseDiv.Layouts; foreach (var renderLayout in childrenDiv) { renderLayout.CssRule.Should().BeEquivalentTo(baseDiv.CssRule); } }
public static void LoadHtmlFile( string url, DomDocument document) { if (url == null) { throw new ArgumentNullException("url"); } if (document == null) { throw new ArgumentNullException("document"); } HTMLDocument htmlDoc = new HTMLDocument(); htmlDoc.LoadUrl(url); document.BaseUrl = htmlDoc.BaseURL; HtmlLoader loader = new HtmlLoader(); loader.Load(htmlDoc, document); document.AfterLoad(FileFormat.Html); }
public BrowserControl( IContextMenuHandler contextMenuHandler, IDialogHandler dialogHandler, IDisplayHandler displayHandler, IDownloadHandler downloadHandler, HtmlLoader htmlLoader, IKeyboardHandler keyboardHandler, ILifeSpanHandler lifeSpanHandler, ILogger logger, IRequestHandler requestHandler, string url) : base(url) { this.contextMenuHandler = contextMenuHandler; this.dialogHandler = dialogHandler; this.displayHandler = displayHandler; this.downloadHandler = downloadHandler; this.htmlLoader = htmlLoader; this.keyboardHandler = keyboardHandler; this.lifeSpanHandler = lifeSpanHandler; this.logger = logger; this.requestHandler = requestHandler; }
public static void LoadHtmlFile( System.IO.Stream stream, DomDocument document, string baseUrl) { if (stream == null) { throw new ArgumentNullException("stream"); } if (document == null) { throw new ArgumentNullException("document"); } HTMLDocument htmlDoc = new HTMLDocument(); htmlDoc.Load(stream); htmlDoc.BaseURL = baseUrl; HtmlLoader loader = new HtmlLoader(); loader.Load(htmlDoc, document); document.AfterLoad(FileFormat.Html); }