Example #1
0
        public void BuildRender_InheritStylesWithParent_RenderLayout()
        {
            var _renderTree = new RenderTree();
            var loader      = new HtmlLoader();
            var window      = loader.LoadFromFile("Contents/test_html_inherit_styles_with_parent.html");

            _renderTree.Build(window);
            var root    = _renderTree.LayoutRoot;
            var html    = root.Layouts.FirstOrDefault();
            var body    = html.Layouts.LastOrDefault();
            var mainDiv = body.Layouts.FirstOrDefault();

            mainDiv.CssRule.Should().NotBeNull();

            mainDiv.Layouts.FirstOrDefault().CssRule.SelectorText.Should().BeEquivalentTo(".oneLevel");
            var twoLevelDiv = mainDiv.Layouts.LastOrDefault();

            twoLevelDiv.CssRule.SelectorText.Should().BeEquivalentTo(".twoLevel");
            var twoLevelA = twoLevelDiv.Layouts.FirstOrDefault();

            twoLevelA.CssRule.SelectorText.Should().BeEquivalentTo(".twoLevel");

            var twoStyle = twoLevelA.CssRule.Style;
            var oneStyle = mainDiv.CssRule.Style;

            twoStyle.Color.Should().BeEquivalentTo(oneStyle.Color);
            twoStyle.Background.Color.Should().BeEquivalentTo(new CSSColor(17, 17, 17));
        }
Example #2
0
        private async void Worker()
        {
            loader = new HtmlLoader(parserSettings);
            for (int i = parserSettings.StartPoint; i <= parserSettings.EndPoint; i++)
            {
                if (!Program.ParserActive)
                {
                    return;
                }

                var source = await loader.GetSourceByPageId(i);

                var domParser = new HtmlParser();

                var document = await domParser.ParseDocumentAsync(source);

                Parser.Parse(document);
                lock (locker)
                    OnParseOneLink?.Invoke();
            }

            if (Program.ParserActive)
            {
                ParseComplete?.Invoke();
            }
        }
 public SitemapService(IParser <IEnumerable <string> > parser)
 {
     Parser       = parser;
     Urls         = new HashSet <string>();
     loader       = new HtmlLoader();
     Measurements = new Dictionary <string, List <double> >();
 }
Example #4
0
            public async void Work()
            {
                const string url = "https://citydog.by/allposts/category/people/";

                var loader = new HtmlLoader(url);

                var source = await loader.GetSourceByPageId(1);

                var parser = new HtmlParser();

                var document = parser.Parse(source);

                var itemsText = document.QuerySelectorAll(
                    "div > div.headingArticle > h2 > a");
                var itemsImg = document.QuerySelectorAll(
                    "div > div.imageWrapper > a > img");

                var listText = itemsText.Select(item => item.TextContent).ToList();

                var listImg = itemsImg.Select(item => item.GetAttribute("src")).ToList();

                var dic = listText.Zip(listImg, (k, v) => new { k, v })
                          .ToDictionary(x => x.k, x => x.v);

                foreach (var res in dic)
                {
                    Console.WriteLine($"Заголовок: {res.Key}");
                    Console.WriteLine($"Ссылка на изображение: {res.Value}");
                    Console.WriteLine($"-----------------------------------------------------------");
                }
            }
Example #5
0
        public static void LoadHtmlFile(
            System.IO.TextReader reader,
            DomDocument document,
            string baseUrl)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }
            if (document == null)
            {
                throw new ArgumentNullException("document");
            }
            HTMLDocument htmlDoc = new HTMLDocument();
            string       html    = reader.ReadToEnd();

            htmlDoc.LoadHTML(html);
            if (string.IsNullOrEmpty(baseUrl) == false)
            {
                htmlDoc.BaseURL = baseUrl;
            }
            HtmlLoader loader = new HtmlLoader();

            loader.Load(htmlDoc, document);
            document.AfterLoad(FileFormat.Html);
        }
Example #6
0
        public void Should_ParseCatalogContent()
        {
            // Arrange
            const string host = "http://www.pluralsight.com/";

            var nodeSelector = new PluralsightNodeSelector();
            var nodeParser   = new PluralsightNodeParser(host, nodeSelector);

            var sut = new PluralsightCatalogParser(nodeSelector, nodeParser);

            // add category reference for each course
            foreach (var expectedCategory in ExpectedCategoriesDictionary.Values)
            {
                foreach (var expectedCourse in expectedCategory.Courses)
                {
                    expectedCourse.Category = expectedCategory;
                }
            }

            const string catalogPath = "./../../TestData/PluralsighCatalogTestData.html";
            var          loader      = new HtmlLoader <HtmlAgilityPackHtmlContainer>();
            var          catalog     = loader.Load(catalogPath, LocationType.Local);


            // Act
            var result = sut.Parse(catalog);

            // Assert

            // Authors
            var resultAuthors = result.AuthorsParseResult.AuthorsExceptWhoseUrlNullContainer.Values
                                .OrderBy(x => x.UrlName);

            var expectedAuthors = ExpectedAuthorsDictionary.Values
                                  .Where(x => x.UrlName != null)
                                  .OrderBy(x => x.UrlName);

            Assert.Equal(expectedAuthors, resultAuthors, PluralsightAuthor.PropertiesComparer);

            // Courses
            var resultCourses   = result.CoursesParseResult.CourseContainer.Values;
            var expectedCourses = ExpectedCoursesDictionary.Values;

            CheckCoursesEquality(expectedCourses, resultCourses);

            // Categories
            var resultCategories   = result.CategoriesParseResult.CategoryContainer.Values;
            var expectedCategories = ExpectedCategoriesDictionary.Values;

            Assert.Equal(expectedCategories.Count, resultCategories.Count);
            foreach (var resultCategory in resultCategories)
            {
                var expectedCategory = expectedCategories
                                       .Single(x => x.UrlName == resultCategory.UrlName);

                Assert.Equal(expectedCategory, resultCategory, PluralsightCategory.PropertiesComparer);

                CheckCoursesEquality(expectedCategory.Courses, resultCategory.Courses, resultCategory);
            }
        }
Example #7
0
 public Presenter()//незабываем проинициализировать
 {
     loader             = new HtmlLoader();
     parser             = new Parser();
     detector           = new WordDetector();
     detector.Finished += Handler;
 }
Example #8
0
        public static HtmlLoader BuildHtmlLoader()
        {
            var databaseProvider = new DatabaseProvider();
            var templateProvider = new TemplateProvider(databaseProvider);
            var htmlLoader       = new HtmlLoader(templateProvider);

            return(htmlLoader);
        }
Example #9
0
        public async Task InitAsync()
        {
            var isOfflineOk = await HtmlLoader.TryLoadAgentSource(true);

            if (!isOfflineOk)
            {
                var isOnlineOk = await HtmlLoader.TryLoadAgentSource(false);
            }
        }
Example #10
0
 internal ResourceHandler(BrowserSettings settings, IRequestFilter filter, ILogger logger, IText text)
 {
     this.algorithm  = new SHA256Managed();
     this.htmlLoader = new HtmlLoader(text);
     this.filter     = filter;
     this.logger     = logger;
     this.text       = text;
     this.settings   = settings;
 }
Example #11
0
        public void CreatesHtmlLoader()
        {
            var expectedLoader = new HtmlLoader(Mock.Of <IHtmlWrapperFactory>(), Mock.Of <IWebClientWrapper>());

            var loaderFactory = CreateInstance(() => expectedLoader);

            var loader = loaderFactory.Create();

            loader.ShouldBeSameAs(expectedLoader);
        }
Example #12
0
        public void RenderLayoutCompositor_AbsolutePosition_ReturnOffsetPosition()
        {
            var _renderTree = new RenderTree();
            var loader      = new HtmlLoader();
            var window      = loader.LoadFromFile("Contents/test_compositor_absolute_position.html");

            _renderTree.Build(window);
            var root = _renderTree.LayoutRoot;
            var html = root.Layouts.FirstOrDefault();
            var body = html.Layouts.LastOrDefault();
        }
Example #13
0
        public async Task <TwitterUserModel> GetUserProfileAsync(string username)
        {
            var htmlDoc = await HtmlLoader.TryLoadAndParsePageAsync($"https://twitter.com/{username}?lang=en", GetRandomProxy());

            if (htmlDoc != null)
            {
                if (HtmlExtracter.TryParseUser(htmlDoc, out var user))
                {
                    return(user);
                }
            }

            return(null);
        }
Example #14
0
        public static async Task <AngleSharp.Dom.Html.IHtmlDocument> GetDocument(int i, string link)
        {
            HtmlLoader loader = new HtmlLoader();
            var        source = await loader.GetSource(i, $"{link}");

            if (source == "404")
            {
                return(null);
            }
            var domParser = new HtmlParser();
            var document  = await domParser.ParseAsync(source);

            return(document);
        }
        private void InitializeControl()
        {
            var contextMenuHandler = new ContextMenuHandler();
            var controlLogger      = logger.CloneFor($"{nameof(BrowserControl)} #{Id}");
            var dialogHandler      = new DialogHandler();
            var displayHandler     = new DisplayHandler();
            var downloadLogger     = logger.CloneFor($"{nameof(DownloadHandler)} #{Id}");
            var downloadHandler    = new DownloadHandler(appConfig, settings, downloadLogger);
            var htmlLoader         = new HtmlLoader(text);
            var keyboardHandler    = new KeyboardHandler();
            var lifeSpanHandler    = new LifeSpanHandler();
            var requestFilter      = new RequestFilter();
            var requestLogger      = logger.CloneFor($"{nameof(RequestHandler)} #{Id}");
            var requestHandler     = new RequestHandler(appConfig, requestFilter, requestLogger, settings, text);

            Icon = new BrowserIconResource();

            dialogHandler.DialogRequested  += DialogHandler_DialogRequested;
            displayHandler.FaviconChanged  += DisplayHandler_FaviconChanged;
            displayHandler.ProgressChanged += DisplayHandler_ProgressChanged;
            downloadHandler.ConfigurationDownloadRequested += DownloadHandler_ConfigurationDownloadRequested;
            downloadHandler.DownloadUpdated    += DownloadHandler_DownloadUpdated;
            keyboardHandler.ReloadRequested    += ReloadRequested;
            keyboardHandler.ZoomInRequested    += ZoomInRequested;
            keyboardHandler.ZoomOutRequested   += ZoomOutRequested;
            keyboardHandler.ZoomResetRequested += ZoomResetRequested;
            lifeSpanHandler.PopupRequested     += LifeSpanHandler_PopupRequested;
            requestHandler.QuitUrlVisited      += RequestHandler_QuitUrlVisited;
            requestHandler.RequestBlocked      += RequestHandler_RequestBlocked;

            InitializeRequestFilter(requestFilter);

            control = new BrowserControl(
                contextMenuHandler,
                dialogHandler,
                displayHandler,
                downloadHandler,
                htmlLoader,
                keyboardHandler,
                lifeSpanHandler,
                controlLogger,
                requestHandler,
                startUrl);
            control.AddressChanged      += Control_AddressChanged;
            control.LoadingStateChanged += Control_LoadingStateChanged;
            control.TitleChanged        += Control_TitleChanged;

            control.Initialize();
            logger.Debug("Initialized browser control.");
        }
Example #16
0
        public async Task <TwitterFollowPageModel> GetFollowerAsync(string username, string cursor = "-1")
        {
            var url     = AddressLocator.Follower(username, cursor);
            var htmlDoc = await HtmlLoader.TryLoadAndParsePageAsync(url, GetRandomProxy(), false, false);

            if (htmlDoc != null)
            {
                if (HtmlExtracter.TryParseFollower(htmlDoc, out var followerPage))
                {
                    followerPage.BelongUserName = username;
                    return(followerPage);
                }
            }
            return(null);
        }
Example #17
0
        public void BuildRenderBindStyleTest()
        {
            var _renderTree = new RenderTree();
            var loader      = new HtmlLoader();
            var window      = loader.LoadFromFile("Contents/test_html_bind_style.html");

            _renderTree.Build(window);
            var root = _renderTree.LayoutRoot;
            var html = root.Layouts.FirstOrDefault();
            var body = html.Layouts.LastOrDefault();

            var button = body.Layouts.FirstOrDefault();

            button.CssRule.Should().NotBeNull();
        }
Example #18
0
        private async Task ProcessPage(int pageNumber, ConcurrentBag <string> stringBag)
        {
            var currentPage = await HtmlLoader.LoadAsync($"{baseUrl}/catalog/{SectionName}/{pageNumber}");

            var recipeElements = currentPage.QuerySelectorAll(".h5");

            Console.WriteLine($"{recipeElements.Length} recipes found at page {pageNumber}.");

            foreach (var recipeElement in recipeElements)
            {
                //Interlocked.Increment(ref counter);
                var url = $"{baseUrl}{recipeElement.GetAttribute("href")}";
                stringBag.Add(url);
                //Console.WriteLine($"Recipe {Volatile.Read(ref counter)}: {url} added to parsing queue.");
            }
        }
Example #19
0
        private static void Main(string[] args)
        {
            var htmlCleaner                       = new HtmlCleaner(new ISiteHtmlCleaner[] { new DailyHtmlCleaner(), new MirrorHtmlCleaner() });
            var htmlLoader                        = new HtmlLoader();
            var articleProvider                   = new ArticleProvider(htmlCleaner, htmlLoader);
            var cosineSimilarityCalculator        = new CosineSimilarityCalculator();
            IDocumentFrequencyProvider dfProvider = LoadFrequencies();
            var tfIdfCalculator                   = new TfIdfCalculator(dfProvider);
            var tokenizer        = new Tokenizer();
            var articleProcessor = new ArticleProcessor(tfIdfCalculator, tokenizer);
            var articleComparer  = new TextProcessing.ArticleComparer(articleProvider, cosineSimilarityCalculator,
                                                                      articleProcessor);

            Console.WriteLine("Similar articles:");
            double similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/news/article-2489957/Britains-spy-chiefs-grilled-MPs-television-time.html",
                    @"http://www.mirror.co.uk/news/uk-news/mi6-mi5-gchq-bosses-questioned-2685310");

            Console.WriteLine(similarity);
            similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/news/article-2489640/80-parents-caught-children-copying-p**n-style-dances-offensive-lyrics.html",
                    @"http://www.mirror.co.uk/news/uk-news/miley-cyrus-twerking-kids-copying-2685363");
            Console.WriteLine(similarity);

            Console.WriteLine("Same article:");
            similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/news/article-2490296/You-STILL-likely-lose-job-recession-25s-shop-workers-risk.html",
                    @"http://www.dailymail.co.uk/news/article-2490296/You-STILL-likely-lose-job-recession-25s-shop-workers-risk.html");
            Console.WriteLine(similarity);

            Console.WriteLine("Different articles:");
            similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/femail/article-2489984/Needy-people-likely-cheat.html",
                    @"http://www.dailymail.co.uk/news/article-2490531/Worlds-oldest-paperboy-deliver-round-71-years-route.html");
            Console.WriteLine(similarity);
            similarity =
                articleComparer.Compare(
                    @"http://www.dailymail.co.uk/news/article-2490412/Wikileaks-journalist-spent-4-months-Edward-Snowden-leaves-Russia.html",
                    @"http://www.dailymail.co.uk/news/article-2489994/Twitter-share-prices-soar-firms-day-trading.html");
            Console.WriteLine(similarity);
            Console.ReadKey();
        }
Example #20
0
 internal ResourceHandler(
     AppConfig appConfig,
     IRequestFilter filter,
     ILogger logger,
     BrowserSettings settings,
     WindowSettings windowSettings,
     IText text)
 {
     this.appConfig      = appConfig;
     this.algorithm      = new SHA256Managed();
     this.filter         = filter;
     this.htmlLoader     = new HtmlLoader(text);
     this.logger         = logger;
     this.settings       = settings;
     this.windowSettings = windowSettings;
     this.text           = text;
 }
Example #21
0
        public async Task CanParse()
        {
            // Arrange
            IParserSettings parserSettings = new ShopSettings(1, 1);
            HtmlLoader      loader         = new HtmlLoader(parserSettings);
            ShopParser      parser         = new ShopParser();
            var             domParser      = new HtmlParser();
            var             source         = await loader.GetSourceByPageId(2);

            var document = await domParser.ParseAsync(source);

            // Act
            var result = parser.Parse(document);

            //Assert
            Assert.IsNotNull(parser.PricesList);
        }
Example #22
0
        private static void Main(string[] args)
        {
            var htmlCleaner       = new HtmlCleaner(new[] { new TelegraphHtmlCleaner() });
            var htmlLoader        = new HtmlLoader();
            var articleProvider   = new ArticleProvider(htmlCleaner, htmlLoader);
            var frequencyProvider = new DocumentFrequencyProvider();
            var tokenizer         = new Tokenizer();

            string[] articleUrls = File.ReadAllLines(ArticlesUrlsFile, Encoding.Unicode);
            foreach (string articleUrl in articleUrls)
            {
                IArticle             article = articleProvider.Get(articleUrl);
                IEnumerable <IToken> tokens  = tokenizer.Tokenize(article.Text);
                frequencyProvider.ProcessText(tokens);
            }

            SaveFrequencies(frequencyProvider);
        }
Example #23
0
        public void Startup(string pathToFileIndex)
        {
            var loader = new HtmlLoader();

            Window    = loader.LoadFromFile(pathToFileIndex);
            _renderer = new HtmlRenderer(Window);

            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                SoundPlayer = new WindowsSoundPlayer();
            }
            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
            {
                // AFPlayer
            }

            RunWindow();
        }
Example #24
0
        public override async Task <IEnumerable <string> > GetPages()
        {
            var pageCount = 1;
            var basePage  = await HtmlLoader.LoadAsync($"{baseUrl}/catalog/{SectionName}");

            var pager = basePage.QuerySelector(".search-pages [href]:last-child");

            if (pager != null)
            {
                pageCount = PageCountParser(pager.Text());
            }

            var stringBag = new ConcurrentBag <string>();
            //var counter = 1;

            await Enumerable.Range(1, Math.Min(pageCount, MaxPageAmount)).ParallelForEachAsync((i) => ProcessPage(i, stringBag),
                                                                                               Environment.ProcessorCount);

            return(stringBag.ToList());
        }
        /// <summary>
        /// Event handler of the search button for showing list of songs and manupulating buttons on MainWindow
        /// </summary>
        private async void SearchButton_Click(object sender, RoutedEventArgs e)
        {
            ClearSongBox();
            SwitchingButtons.Visibility = Visibility.Visible;
            string querySongName = textBox.Text;

            loader = new HtmlLoader(new ParserSettings());
            parser = new Parser();

            try
            {
                songs = parser.Parse(await loader.LoadDocumentAsync(querySongName)).ToList();
            }
            catch (ArgumentNullException)
            {
                errorMessage.Visibility = Visibility.Visible;
                return;
            }

            songNameLabels = songs.Select(song => new Label
            {
                Content    = song.Name,
                FontFamily = new FontFamily("Microsoft YaHei UI Light")
            }).ToList();

            songDurationLabels = songs.Select(song => new Label
            {
                Content    = song.Duration,
                FontFamily = new FontFamily("Microsoft YaHei UI Light")
            }).ToList();

            songDownloadButtons = songs.Select(song => new Button
            {
                Content    = "Скачать",
                Tag        = song.DownloadLink + "_%_" + song.Name + ".mp3",
                FontFamily = new FontFamily("Microsoft YaHei UI Light")
            }).ToList();

            CheckForDownloadedSongs();
            showSongsOnWindow(currentSongsPage);
        }
Example #26
0
        public static void Test(AwsKeyHelper keyHelper)
        {
            var helper = new SignedRequestHelper(keyHelper.GetAwsAccessKeyId(), keyHelper.GetAwsSecretKey(), DESTINATION);

            String requestUrl;
            String title;

            //TODO: AHT - Add AssociateId
            // Add Request week for caching key?

            /*
             * Here is an ItemLookup example where the request is stored as a dictionary.
             */
            IDictionary <string, string> r1 = new Dictionary <string, String>();

            r1["Service"] = "AWSECommerceService";
            r1["Version"] = "2009-03-31";
            //r1["Operation"] = "ItemLookup";
            r1["Operation"] = "ItemLookup";
            //r1["ItemId"] = ITEM_ID;
            r1["ItemId"]      = isbn;
            r1["IdType"]      = "ISBN";
            r1["SearchIndex"] = "Books";
            //r1["ResponseGroup"] = "Small";
            r1["ResponseGroup"] = "Tags,Reviews,EditorialReview";
            r1["TagsPerPage"]   = "20";

            /* Random params for testing */
            //r1["AnUrl"] = "http://www.amazon.com/books";
            //r1["AnEmailAddress"] = "*****@*****.**";
            //r1["AUnicodeString"] = "αβγδεٵٶٷٸٹٺチャーハン叉焼";
            //r1["Latin1Chars"] = "ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJij";

            requestUrl = helper.Sign(r1);
            var response = new HtmlLoader().LoadUrl(requestUrl);

            var xDoc = XDocument.Parse(response);
            var x    = xDoc.Descendants().Where(node => node.Name.LocalName == "Item").Select(item => Item.Create(item));

            x.Count();
        }
Example #27
0
        public void BuildRender_InheritStyles_RenderLayout()
        {
            var _renderTree = new RenderTree();
            var loader      = new HtmlLoader();
            var window      = loader.LoadFromFile("Contents/test_html_layout_ingerit_styles.html");

            _renderTree.Build(window);
            var root    = _renderTree.LayoutRoot;
            var html    = root.Layouts.FirstOrDefault();
            var body    = html.Layouts.LastOrDefault();
            var baseDiv = body.Layouts.FirstOrDefault();

            baseDiv.CssRule.Should().NotBeNull();

            var childrenDiv = baseDiv.Layouts;

            foreach (var renderLayout in childrenDiv)
            {
                renderLayout.CssRule.Should().BeEquivalentTo(baseDiv.CssRule);
            }
        }
Example #28
0
        public static void LoadHtmlFile(
            string url,
            DomDocument document)
        {
            if (url == null)
            {
                throw new ArgumentNullException("url");
            }
            if (document == null)
            {
                throw new ArgumentNullException("document");
            }
            HTMLDocument htmlDoc = new HTMLDocument();

            htmlDoc.LoadUrl(url);
            document.BaseUrl = htmlDoc.BaseURL;
            HtmlLoader loader = new HtmlLoader();

            loader.Load(htmlDoc, document);
            document.AfterLoad(FileFormat.Html);
        }
Example #29
0
 public BrowserControl(
     IContextMenuHandler contextMenuHandler,
     IDialogHandler dialogHandler,
     IDisplayHandler displayHandler,
     IDownloadHandler downloadHandler,
     HtmlLoader htmlLoader,
     IKeyboardHandler keyboardHandler,
     ILifeSpanHandler lifeSpanHandler,
     ILogger logger,
     IRequestHandler requestHandler,
     string url) : base(url)
 {
     this.contextMenuHandler = contextMenuHandler;
     this.dialogHandler      = dialogHandler;
     this.displayHandler     = displayHandler;
     this.downloadHandler    = downloadHandler;
     this.htmlLoader         = htmlLoader;
     this.keyboardHandler    = keyboardHandler;
     this.lifeSpanHandler    = lifeSpanHandler;
     this.logger             = logger;
     this.requestHandler     = requestHandler;
 }
Example #30
0
        public static void LoadHtmlFile(
            System.IO.Stream stream,
            DomDocument document,
            string baseUrl)
        {
            if (stream == null)
            {
                throw new ArgumentNullException("stream");
            }
            if (document == null)
            {
                throw new ArgumentNullException("document");
            }
            HTMLDocument htmlDoc = new HTMLDocument();

            htmlDoc.Load(stream);
            htmlDoc.BaseURL = baseUrl;
            HtmlLoader loader = new HtmlLoader();

            loader.Load(htmlDoc, document);
            document.AfterLoad(FileFormat.Html);
        }