public void CreateTextNodeWithText()
 {
     var doc = new HtmlDocument();
     var a = doc.CreateTextNode("something");
     Assert.AreEqual("something", a.InnerText);
     Assert.AreEqual(a.NodeType, HtmlNodeType.Text);
 }
        public void Head_ReturnsHeadObject()
        {
            var doc = new HtmlDocument();
            var head = doc.Head;

            Assert.IsInstanceOf<Head>(head);
        }
Пример #3
0
 static void Main(string[] args)
 {
     HtmlDocument doc = new HtmlDocument();
     doc.Load(@"..\..\mshome.htm");
     doc.OptionOutputAsXml = true;
     doc.Save("mshome.xml");
 }
Пример #4
0
 public static List<string> GetLinksFromWebsite(string htmlSource)
 {
     var doc = new HtmlDocument();
     try
     {
         doc.LoadHtml(htmlSource);
         if (doc.DocumentNode.InnerHtml == null) return null;
         if (doc.DocumentNode.SelectNodes("//a[@ref]") == null)
         {
             return doc
                .DocumentNode
                .SelectNodes("//a[@href]")
                .Select(node => node.Attributes["href"].Value)
                .ToList();
         }
         return doc
             .DocumentNode
             .SelectNodes("//a[@ref]")
             .Select(node => node.Attributes["ref"].Value)
             .ToList();
     }
     catch (Exception)
     {
     }
         return null;
 }
        public void Mouse_click_will_fail_because_hidden_link_is_matched_instead_of_visible_link()
        {
            // Playback.PlaybackSettings.SmartMatchOptions = SmartMatchOptions.None; // tried Sudhish Mathuria's suggestion

            var example2Path = Path.Combine(TestContext.TestDeploymentDir, "example2.htm");

            var window = BrowserWindow.Launch(example2Path);

            var document = new HtmlDocument(window);
            document.FilterProperties[HtmlDocument.PropertyNames.Title] = "2ad6de55-75f1-403f-8fcb-1d5defac9786";

            var visibleLink = new HtmlHyperlink(document);
            visibleLink.SearchProperties[HtmlHyperlink.PropertyNames.InnerText] = "Hello";

            // ONLY FIND VISIBLE CONTROLS !
            visibleLink.SearchConfigurations.Add(SearchConfiguration.VisibleOnly);

            var allMatches = visibleLink.FindMatchingControls();
            Assert.AreEqual(2, allMatches.Count, "Should be two matching.");

            visibleLink.Find(); // THIS SHOULD BE THE VISIBLE ONE

            Debug.WriteLine("BoundingRectangle: " + visibleLink.BoundingRectangle);
            Assert.IsTrue(visibleLink.BoundingRectangle.Width > 0, "Width should positive.");
            Assert.IsTrue(visibleLink.BoundingRectangle.Height > 0, "Height should positive.");

            Mouse.Click(visibleLink);
        }
Пример #6
0
 public void CreateElement()
 {
     var doc = new HtmlDocument();
     var a = doc.CreateElement("a");
     Assert.AreEqual("a", a.Name);
     Assert.AreEqual(a.NodeType, HtmlNodeType.Element);
 }
Пример #7
0
 public void CreateAttributeWithText()
 {
     var doc = new HtmlDocument();
     var a = doc.CreateAttribute("href", "http://something.com");
     Assert.AreEqual("href", a.Name);
     Assert.AreEqual("http://something.com", a.Value);
 }
Пример #8
0
 public void Head_SetNonExisting_Null_RemovesElement()
 {
     HtmlDocument document = new HtmlDocument() { Head = null };
     document.Head = null;
     Assert.True(document.IsEmpty);
     Assert.Null(document.Head);
 }
 private static string ExtractCss(HtmlDocument doc)
 {
     var styleTags = doc.FindTagsByName("style", "link").ToArray();
     var css = new StringBuilder();
     foreach (var tag in styleTags)
     {
         if (tag.IsType("style"))
             css.AppendLine(tag.ChildElements.ToSeparatedString(" "));
         else if (tag.IsType("link") && (string.Compare(tag.Attributes["type"], "text/css", true) == 0 || string.Compare(tag.Attributes["rel"], "stylesheet") == 0))
         {
             var src = tag.Attributes["href"];
             if (string.IsNullOrEmpty(src))
                 continue;
             var uri = new Uri(src, UriKind.RelativeOrAbsolute);
             if (!uri.IsAbsoluteUri && HttpContext.Current != null && HttpContext.Current.Request != null)
             {
                 uri = new Uri(HttpContext.Current.Request.Url, uri);
             }
             if (!uri.IsAbsoluteUri)
                 continue;
             var client = new WebClient();
             css.Append(client.DownloadString(uri));
         }
     }
     foreach (var tag in styleTags)
     {
         tag.Remove();
     }
     return css.ToString();
 }
Пример #10
0
 public void CreateTextNode()
 {
     var doc = new HtmlDocument();
     var a = doc.CreateTextNode();
     Assert.AreEqual(HtmlNode.HtmlNodeTypeNameText, a.Name);
     Assert.AreEqual(a.NodeType, HtmlNodeType.Text);
 }
Пример #11
0
	static void Main (string [] args)
	{
		HtmlDocument doc = new HtmlDocument();
		doc.Load(args [0]);
		doc.OptionOutputAsXml = true;
		doc.Save(args [1]);
	}
Пример #12
0
 public void CreateCommentWithText()
 {
     var doc = new HtmlDocument();
     var a = doc.CreateComment("something");
     Assert.AreEqual(HtmlNode.HtmlNodeTypeNameComment, a.Name);
     Assert.AreEqual("something", a.InnerText);
     Assert.AreEqual(a.NodeType, HtmlNodeType.Comment);
 }
 public void TestCallingExistingMember()
 {
     var doc = new HtmlDocument();
     doc.LoadHtml("<html><body class=\"asdfasd\"><p>asdf asdf sdf</p></body></html>");
     dynamic docElement = doc.DocumentNode;
     var item = docElement.Closed;
     Assert.IsInstanceOf<bool>(item);
 }
 public void TestCallingExistingFunction()
 {
     var doc = new HtmlDocument();
     doc.LoadHtml("<html><body class=\"asdfasd\"><p>asdf asdf sdf</p></body></html>");
     dynamic docElement = doc.DocumentNode;
     var item = docElement.Descendants();
     Assert.IsInstanceOf<IEnumerable<HtmlNode>>(item);
 }
Пример #15
0
        public void Test_Attribute_ToString_Returns_Empty_String_When_Value_Is_Not_Defined()
        {
            var doc = new HtmlDocument("<html><head></head><body><a href>yo</a></body></html>");

            var link = doc.DocumentNode.SelectSingleNode("//a");

            Assert.AreEqual(string.Empty, link.Attributes["href"].ToString());
        }
        public void Body_ReturnsBodyObject()
        {
            var doc = new HtmlDocument();

            Body body = doc.Body;

            Assert.IsInstanceOf<Body>(body);
        }
Пример #17
0
 public void Head_SetExisting_NonNull_RemovesElement()
 {
     HtmlElement value = Tag.Head.WithClass("class");
     HtmlDocument document = new HtmlDocument() { Head = Tag.Head };
     document.Head = value;
     Assert.Equal(new HtmlElement[] { value }, document.Elements());
     Assert.Equal(value, document.Head);
 }
Пример #18
0
        public void Test_HasAttribute()
        {
            var doc = new HtmlDocument("<html><head></head><body><a href=\"http://flo.se\">yo</a></body></html>");

            var link = doc.DocumentNode.SelectSingleNode("//a");

            Assert.IsTrue(link.HasAttribute("href"));
        }
Пример #19
0
        public void Test_Attribute_ToString_Returns_Value()
        {
            var doc = new HtmlDocument("<html><head></head><body><a href=\"http://flo.se\">yo</a></body></html>");

            var link = doc.DocumentNode.SelectSingleNode("//a");

            Assert.AreEqual("http://flo.se", link.Attributes["href"].ToString());
        }
Пример #20
0
    public static async Task MainAsync()
    {
        string html;

        using (var client = new HttpClient())
        {
            html = await client.GetStringAsync("http://market.karelia.pro/section/8/");
        }

        var doc = new HtmlDocument();
        doc.LoadHtml(html);

        var pageCount =
            int.Parse(HttpUtility.ParseQueryString(
                new Uri(doc.DocumentNode.SelectSingleNode(".//*[@id='paginator']/li[9]/a").GetAttributeValue("href", ""))
                    .Query
                ).Get("page")
                );

        var task = Enumerable.Range(1, pageCount)
            .Select((n, i) => $"http://market.karelia.pro/section/8/?page={i + 1}")
            .Select(
                async n =>
                {
                    using (var clien = new HttpClient())
                    {
                        return await clien.GetStringAsync(n);
                    }
                }).ToList();

        var results = await Task.WhenAll(task);


        var q = results.SelectMany(n =>
        {
            var innerdoc = new HtmlDocument();
            innerdoc.LoadHtml(n);

            var foo =
                innerdoc.DocumentNode.SelectNodes(".//*[@id='alist']/li")
                    .Select(x => new
                    {
                        link = x.SelectSingleNode("//div[@class='name']/a").Attributes["href"].Value,
                        title = x.SelectSingleNode("//div[@class='name']/a/span[@class='title']").InnerText,
                        price = x.SelectSingleNode("//div[@class='price']/strong/span").InnerText
                    });

            return foo;
        }).ToList();

        new XDocument(new XDeclaration("1.0", null, null),
            new XElement("root",
                q.Select(
                    n =>
                        new XElement("item", new XElement(nameof(n.link), n.link),
                            new XElement(nameof(n.price), n.price), new XElement(nameof(n.title), n.title))))).Save(
                                "result.xml");
    }
        public void TestInitial()
        {
            var doc = new HtmlDocument();

            string html = doc.ToString();
            string expected = @"<!DOCTYPE html><html><head></head><body></body></html>";

            Assert.AreEqual(expected, html);
        }
 public void TestGetAttribute()
 {
     var doc = new HtmlDocument();
     doc.LoadHtml("<html><body class=\"asdfasd\"><p>asdf asdf sdf</p></body></html>");
     dynamic docElement = doc.DocumentNode;
     var item = docElement.Html.Body._Class;
     Assert.IsNotNull(item);
     Assert.IsInstanceOf<HtmlAttribute>(item);
 }
        public void BodyChanged_GetString_ReturnsCorrectHtml()
        {
            var doc = new HtmlDocument();
            doc.Body.AddClass("one");
            string html = doc.ToString();
            string expected = @"<!DOCTYPE html><html><head></head><body class=""one""></body></html>";

            Assert.AreEqual(expected, html);
        }
 public void TestGetMember()
 {
     var doc = new HtmlDocument();
     doc.LoadHtml("<html><body><p>asdf asdf sdf</p></body></html>");
     dynamic docElement = doc.DocumentNode;
     var item = docElement.Html.Body;
     Assert.IsNotNull(item);
     Assert.IsInstanceOf<HtmlNode>(item);
 }
Пример #25
0
		public void StackOverflow()
		{
			var url = "http://rewarding.me/active-tel-domains/index.php/index.php?rescan=amour.tel&w=A&url=&by=us&limits=0";
			var request = WebRequest.Create(url);
			var htmlDocument = new HtmlDocument();
			htmlDocument.Load((request.GetResponse()).GetResponseStream());
			Stream memoryStream = new MemoryStream();
			htmlDocument.Save(memoryStream);
		}
Пример #26
0
 /// <summary>
 /// Creates an instance of a DocumentWithLinkedFiles.
 /// </summary>
 /// <param name="doc">The input HTML document. May not be null.</param>
 public DocumentWithLinks(HtmlDocument doc)
 {
     if (doc == null)
     {
         throw new ArgumentNullException("doc");
     }
     _doc = doc;
     GetLinks();
     GetReferences();
 }
Пример #27
0
        public void Ctor()
        {
            HtmlDocument document = new HtmlDocument();
            Assert.Equal("html", document.Tag);
            Assert.False(document.IsVoid);
            Assert.True(document.IsEmpty);

            Assert.Null(document.Head);
            Assert.Null(document.Body);
        }
Пример #28
0
 public void AddImageNode(HtmlDocument htmlDoc,HtmlNode new_node,string image_source)
 {
     //<img src=\"{1}\" style=\"height:100%;width:100%;\"/>
     HtmlNode image_node = htmlDoc.CreateElement("img");
     HtmlAttribute src_attr = htmlDoc.CreateAttribute("src", image_source);
     image_node.Attributes.Append(src_attr);
     HtmlAttribute style_attr = htmlDoc.CreateAttribute("style", "height:100%;width:100%;");
     image_node.Attributes.Append(style_attr);
     new_node.AppendChild(image_node);
 }
Пример #29
0
        public string Convert(string path)
        {
            HtmlDocument doc = new HtmlDocument();
            doc.Load(path);

            StringWriter sw = new StringWriter();
            ConvertTo(doc.DocumentNode, sw);
            sw.Flush();
            return sw.ToString();
        }
Пример #30
0
    public pageDesc getUrlDesc(string str)
    {
        pageDesc pgDesc = new pageDesc();
        str = str.Trim();
        if (str.Trim() == "")
            return null;
        string url = "";
        if (str.Contains("http://"))
        {
            url = str.Substring(str.IndexOf("http://"));
            if (url.Contains(" "))
                url = url.Substring(0, url.IndexOf(' '));

            if (IsAbsoluteUrl(url))
            {
                System.Net.WebClient wc = new System.Net.WebClient();
                HtmlDocument doc = new HtmlDocument();
                doc.Load(wc.OpenRead(url), true);

                var metaTags = doc.DocumentNode.SelectNodes("//title");
                if (metaTags != null)
                    pgDesc.pageTitle = metaTags[0].InnerText;
                metaTags = doc.DocumentNode.SelectNodes("//meta");
                foreach (var tag in metaTags)
                {
                    if (tag.Attributes["name"] != null && tag.Attributes["name"].Value == "description")
                    {
                        pgDesc.pageContent = tag.Attributes["content"].Value;
                    }
                }

                metaTags = doc.DocumentNode.SelectNodes("//img");
                if (metaTags != null)
                {
                    pgDesc.pageImg = metaTags[0].Attributes["src"].Value;
                    if (!metaTags[0].Attributes["src"].Value.Contains("http://"))
                    {
                        if (metaTags[0].Attributes["src"].Value.IndexOf("/") == 0)
                            pgDesc.pageImg = "http://" + new Uri(url).Host + metaTags[0].Attributes["src"].Value;
                        else
                            pgDesc.pageImg = "http://" + new Uri(url).Host + "/" + metaTags[0].Attributes["src"].Value;
                    }
                }
                else
                    pgDesc.pageImg = "";
                pgDesc.pageLink = url;

                return pgDesc;
            }
            else
                return pgDesc;
        }
        else
            return null;
    }
Пример #31
0
        private void Login(IWebProxy proxy)
        {
            if (!cookie.Contains("pixiv") && !cookie.Contains("token="))
            {
                try
                {
                    HtmlDocument hdoc = new HtmlDocument();

                    cookie = "";
                    string
                        data      = "",
                        post_key  = "",
                        loginpost = "https://accounts.pixiv.net/api/login?lang=zh",
                        loginurl  = "https://accounts.pixiv.net/login?lang=zh&source=pc&view_type=page&ref=";

                    int index = rand.Next(0, user.Length);

                    shc.Referer = Referer;
                    shc.Remove("X-Requested-With");
                    shc.Remove("Accept-Ranges");
                    shc.ContentType = SessionHeadersValue.AcceptTextHtml;

                    //请求1 获取post_key
                    data = Sweb.Get(loginurl, proxy, shc);
                    hdoc.LoadHtml(data);
                    post_key = hdoc.DocumentNode.SelectSingleNode("//input[@name='post_key']").Attributes["value"].Value;
                    if (post_key.Length < 9)
                    {
                        SiteManager.echoErrLog(SiteName, "自动登录失败 ");
                    }

                    //请求2 POST取登录Cookie
                    shc.ContentType = SessionHeadersValue.ContentTypeFormUrlencoded;
                    data            = "pixiv_id=" + user[index]
                                      + "&captcha=&g_recaptcha_response="
                                      + "&password="******"&post_key=" + post_key
                                      + "&source=pc&ref=&return_to=https%3A%2F%2Fwww.pixiv.net%2F";
                    data   = Sweb.Post(loginpost, data, proxy, shc);
                    cookie = Sweb.GetURLCookies(SiteUrl);

                    if (!data.Contains("success"))
                    {
                        SiteManager.echoErrLog(SiteName, "自动登录失败 " + data);
                    }
                    else if (data.Contains("locked"))
                    {
                        try
                        {
                            throw new Exception("登录Pixiv时IP被封锁,剩余时间:" + Regex.Match(data, "lockout_time_by_ip\":\"(\\d+)\"").Groups[1].Value);
                        }
                        catch { }
                    }
                    else if (cookie.Length < 9)
                    {
                        SiteManager.echoErrLog(SiteName, "自动登录失败 ");
                    }
                    else
                    {
                        cookie = "pixiv;" + cookie;
                    }
                }
                catch (Exception e)
                {
                    SiteManager.echoErrLog(SiteName, e, "可能无法连接到服务器");
                }
            }
        }
Пример #32
0
        public PluginExecutionResult InstallSolution(string sessionId, string bundleFileName)
        {
            Handler = new HttpClientHandler {
                AllowAutoRedirect = true, UseCookies = false
            };
            using (var client = new HttpClient(Handler))
            {
                client.DefaultRequestHeaders.ExpectContinue = false;
                client.DefaultRequestHeaders.Add("Cookie", $"sessionId={sessionId}");
                client.Timeout = TimeSpan.FromMinutes(10);

                //find out if the device supports bundle installer
                var solutionInstaller = client.GetAsync(string.Format(CultureInfo.CurrentCulture, OmniDeviceSolutionUrl, Device.Address));
                if (!solutionInstaller.Result.IsSuccessStatusCode)
                {
                    return(new PluginExecutionResult(PluginResult.Failed, "Solution Installer not available"));
                }


                using (var formData = new MultipartFormDataContent())
                {
                    formData.Add(new StringContent(CsrfToken), "\"CSRFToken\"");

                    var fileContent = new StreamContent(File.OpenRead(bundleFileName));
                    fileContent.Headers.Add("Content-Type", "application/octet-stream");
                    fileContent.Headers.Add("Content-Disposition",
                                            "form-data; name=\"bundleFile\"; filename=\"" + Path.GetFileName(bundleFileName) + "\"");

                    formData.Add(fileContent, "bundleFile", Path.GetFileName(bundleFileName));

                    formData.Add(new StringContent("Install"), "\"InstallButton\"");
                    formData.Add(new StringContent("SolutionInstallViewSectionId"), "\"StepBackAnchor\"");
                    formData.Add(new StringContent("SolutionInstallViewSectionId"), "\"jsAnchor\"");

                    var message = client.PostAsync(string.Format(CultureInfo.CurrentCulture, OmniDeviceSolutionSaveUrl, Device.Address), formData);

                    if (message.Result.IsSuccessStatusCode)
                    {
                        var          bodyString = message.Result.Content.ReadAsStringAsync();
                        HtmlDocument doc        = new HtmlDocument();
                        doc.LoadHtml(bodyString.Result);
                        var summaryNode = doc.DocumentNode.SelectSingleNode("//div[@id='Summary']");
                        if (summaryNode.HasAttributes)
                        {
                            if (summaryNode.Attributes["class"].Value == "message message-warning")
                            {
                                return(new PluginExecutionResult(PluginResult.Failed, $"Solution not installed: {summaryNode.InnerText}"));
                            }
                        }
                        HtmlNode table = doc.DocumentNode.SelectSingleNode("//table[@id='SolutionsTable']");
                        if (table == null)
                        {
                            return(new PluginExecutionResult(PluginResult.Failed, $"Solution not installed: {summaryNode.InnerText}"));
                        }
                        return(new PluginExecutionResult(PluginResult.Passed, summaryNode.InnerText));
                    }

                    return(new PluginExecutionResult(PluginResult.Failed, message.Exception?.InnerException));
                }
            }
        }
        /// <summary>
        /// Метод для парсинга страниц.
        /// </summary>
        public List <ProductModel> ParseOneQuery(string url)
        {
            RequestModel rModel = SetRequest(url);

            url = SetRequestUrl(rModel);

            List <ProductModel> items = new List <ProductModel>();

            // Перебор по страницам
            bool error   = false;
            int  pageNum = 0;

            while (true)
            {
error:
                string pageContent = new NetworkService().LoadPage(url, error);

                HtmlDocument document = new HtmlDocument();
                document.LoadHtml(pageContent);

                // Перебор по товарам
                HtmlNodeCollection cards = document.DocumentNode.SelectNodes("//div[@data-id]");
                if (cards != null)
                {
                    foreach (HtmlNode card in cards)
                    {
                        //descriptions = card.SelectNodes("//div[contains(@class, 'n-snippet-card2__content')]")
                        HtmlNodeCollection titles = card.SelectNodes("//div[contains(@class, 'n-snippet-card2__title')]"),
                                      prices      = card.SelectNodes("//div[contains(@class, 'n-snippet-card2__main-price-wrapper')]");
                        for (int i = 0; cards.Count > i; i++)
                        {
                            string price = "";
                            if (prices[i].InnerText != "")
                            {
                                price = prices[i].InnerText.Replace(" ", "");
                                price = price.Substring(0, price.Length - 2);
                            }

                            string link = titles[i].FirstChild.GetAttributeValue("href", "").Replace("&amp;", "&");
                            if (link.Contains("market-click2"))
                            {
                                link = "http:" + link;
                                link.Replace(@"\", "/");
                            }
                            else
                            {
                                link = "https://market.yandex.ru" + link;
                            }

                            items.Add(new ProductModel
                            {
                                //Description = descriptions[i].InnerText,
                                Name  = titles[i].InnerText,
                                Link  = link,
                                Price = price
                            });

                            error = false;

                            // Если указано количество позиций, то количество страниц не учитываются
                            if (rModel.PositionsCount > 0)
                            {
                                if (i >= (rModel.PositionsCount - 1))
                                {
                                    return(items);
                                }
                            }
                        }
                        break;
                    }
                }
                else
                {
                    error = true;
                    goto error;
                }

                // Изменить URL для следующего запроса
                pageNum++;

                if (rModel.PageCount == 0 || rModel.PageCount == pageNum)
                {
                    return(items);
                }

                url = UpdatePageNumInRequestUrl(url, pageNum + 1);
            }
        }
        public override void Compose()
        {
            #region Copy CSS File to Output directory

            string cssFile = OptionsContext.Current.Options.BlogCSSFile;
            string cssFolder = Path.Combine(OptionsContext.Current.Options.OutputDirectory, "css");
            IOContext.Current.CreateDirectory(cssFolder);
            IOContext.Current.FileCopy(cssFile, Path.Combine(cssFolder, cssFile));

            #endregion

            IReadOnlyCollection <IBasePage> blogPosts = _blogProvider.Pages;
            int index = 1;
            int posts = blogPosts.Count;

            IList<PopulatedTemplate> populatedTemplates = new List<PopulatedTemplate>();
            PopulatedTemplate rootIndexTemplate = null;

            foreach (IBlogPost blogPost in blogPosts)
            {
                // ASSUMPTION: First blog post in the list is the newest, should have been sorted by date in the BlogPostProvider.
                // Only run this code once.
                if (rootIndexTemplate == null)
                {
                    rootIndexTemplate = createRootIndexPopulatedTemplate(blogPost, posts);
                }

                try
                {
                    HtmlDocument template = CopyOfTemplate;

                    addBlogCSS(template, blogPost);
                    replaceBlogDiv(template, blogPost);
                    replaceAllTitles(template, blogPost);
                    replaceAllDates(template, blogPost);
                    replaceAllFragments(template);

                    NavigationButtons buttonsNeeded;

                    if (posts == 1)
                    {
                        buttonsNeeded = NavigationButtons.None;
                    }
                    else
                    {
                        if (index == 1)
                        {
                            buttonsNeeded = NavigationButtons.PreviousOnly;
                        }
                        else if (index == posts)
                        {
                            buttonsNeeded = NavigationButtons.NextOnly;
                        }
                        else
                        {
                            buttonsNeeded = NavigationButtons.Both;
                        }
                    }

                    populatedTemplates.Add(new PopulatedTemplate(blogPost, template, buttonsNeeded, OptionsContext.Current.Options.OutputDirectory));
                }
                catch (Exception e)
                {
                    ErrorWriterContext.Current.WriteLine(Invariant($"Error creating blog post with title {blogPost.Metadata.Title}."));
                    ErrorWriterContext.Current.WriteLine(e.ToString());
                    throw e;
                }

                index++;
            }

            #region Root Index page creation

            // Create the main page root index file first
            if (rootIndexTemplate == null)
            {
                throw new InvalidOperationException("Stopped because the main page wasn't going to be created");
            }

            // Only two possible cases:
            //      1. There is only one blog post
            //      2. This is the first blog post of many
            // The first blog post should not need next or both navigation buttons.
            switch (rootIndexTemplate.ButtonsNeeded)
            {
                case NavigationButtons.None:
                    {
                        hideNext(rootIndexTemplate);
                        hidePrevious(rootIndexTemplate);
                    }
                    break;
                case NavigationButtons.PreviousOnly:
                    {
                        // ASSUMPTION: The first index will be the previous page, based on date sorting
                        PopulatedTemplate previousTemplate = populatedTemplates[1];
                        replacePrevious(rootIndexTemplate, previousTemplate.RootRelativePath);
                        hideNext(rootIndexTemplate);
                    }
                    break;
                default:
                    throw new InvalidOperationException(Invariant($"Enum value {rootIndexTemplate.ButtonsNeeded} is not valid for the first blog post"));
            }

            rootIndexTemplate.SaveAsRootIndex();

            #endregion

            // Now create the rest of the blog pages
            for (int i = 0; i < populatedTemplates.Count; i++)
            {
                PopulatedTemplate currentTemplate = populatedTemplates[i];

                switch (currentTemplate.ButtonsNeeded)
                {
                    case NavigationButtons.None:
                        {
                            hideNext(currentTemplate);
                            hidePrevious(currentTemplate);
                        }
                        break;
                    case NavigationButtons.NextOnly:
                        {
                            PopulatedTemplate nextTemplate = populatedTemplates[i - 1];
                            replaceNext(currentTemplate, nextTemplate.RelativePath);
                            hidePrevious(currentTemplate);
                        }
                        break;
                    case NavigationButtons.PreviousOnly:
                        {
                            PopulatedTemplate previousTemplate = populatedTemplates[i + 1];
                            replacePrevious(currentTemplate, previousTemplate.RelativePath);
                            hideNext(currentTemplate);
                        }
                        break;
                    case NavigationButtons.Both:
                        {
                            PopulatedTemplate previousTemplate = populatedTemplates[i + 1];
                            replacePrevious(currentTemplate, previousTemplate.RelativePath);

                            PopulatedTemplate nextTemplate = populatedTemplates[i - 1];
                            replaceNext(currentTemplate, nextTemplate.RelativePath);
                        }
                        break;
                    default:
                        throw new InvalidOperationException(Invariant($"Enum value {currentTemplate.ButtonsNeeded} not supported"));
                }

                currentTemplate.Save();
            }
        }
Пример #35
0
        public static int MaxDepth(this HtmlDocument doc)
        {
            var _nodes = doc.GetNodes();

            return(_nodes.Select(c => c.Depth()).OrderByDescending(c => c).FirstOrDefault());
        }
Пример #36
0
        protected override async Task <IEnumerable <ProgramSource> > GetNewChannelList()
        {
            var result = new List <ProgramSource>();

            try
            {
                HttpClient client = new HttpClient();
                var        buffer = await client.GetStreamAsync("http://hdtv.neu6.edu.cn/");

                HtmlDocument doc = new HtmlDocument();
                doc.Load(buffer);

                var channelNodes = doc.DocumentNode.SelectNodes("//div[@class='entry-content']/table/tr/td");
                foreach (var chNode in channelNodes)
                {
                    if (chNode.ChildNodes.Count <= 2)
                    {
                        continue;
                    }
                    string chName = chNode.FirstChild.InnerText.Trim();
                    string chLink = chNode.ChildNodes["a"].GetAttributeValue("href", string.Empty);
                    string chCode = chLink.Substring(chLink.LastIndexOf("=") + 1);

                    string chCodeUnified = chCode;
                    if (chCode.StartsWith("jlu"))
                    {
                        chCodeUnified = chCode.Substring(4);
                    }
                    else if (chCode.StartsWith("hls"))
                    {
                        // 计算频道ID
                        string editName = chName.Replace("+", "").Replace("-", "");
                        if (chName.EndsWith("卫视高清"))
                        {
                            editName = editName.Replace("卫视高清", "hd");
                        }
                        else
                        {
                            editName = editName.Replace("卫视", "tv");
                            editName = editName.Replace("高清", "hd");
                        }
                        chCodeUnified = SuperEncoding.GetSpellCode(editName, false);
                    }

                    Channel channel = Channel.GetChannel(chCodeUnified, chName);
                    result.Add(new ProgramSource()
                    {
                        IsThumbAvaliable = true,
                        //小的缩略图:http://hdtv.neu6.edu.cn/wall/img/{chCode}_s.png
                        ThumbImage       = new Uri($"http://hdtv.neu6.edu.cn/wall/img/{chCode}.png"),
                        MediaSource      = new Uri($"http://media2.neu6.edu.cn/hls/{chCode}.m3u8"),
                        MediaSourceTag   = chCode,
                        IsMediaAvaliable = true,
                        SourceStation    = this,
                        ProgramInfo      = new Program()
                        {
                            Name    = chName,
                            Channel = channel
                        }
                    });
                    LoggingService.Debug("Television", $"[NEU]{chName:10} : {chCodeUnified:10}");
                }
            }
            catch (Exception e)
            {
                LoggingService.Debug("Television", e.Message, Windows.Foundation.Diagnostics.LoggingLevel.Error);
                System.Diagnostics.Debugger.Break();
            }
            return(result);
        }
Пример #37
0
 internal HtmlCommentNode(HtmlDocument ownerdocument, int index)
     :
     base(HtmlNodeType.Comment, ownerdocument, index)
 {
 }
 public static string SearchPageTitle(this HtmlDocument htmldocument)
 {
      var titleNode = htmldocument.DocumentNode.SelectSingleNode("//title");
      return HttpUtility.HtmlDecode(titleNode.InnerText);
 }
Пример #39
0
        public List <Category> GetAllCategoryList(string strHtml)
        {
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(strHtml);

            ////*[@id="category-first"]
            string oneCate   = "//span[@id='category-first']",
                   twoCate   = "//dl[@id='category-second']",
                   threeCate = "//*[@id='category-third']";

            HtmlNode oneNode = doc.DocumentNode.SelectSingleNode(oneCate);

            #region 1.0 添加分类集合
            List <Category> category = new List <Category>();
            if (oneNode != null)
            {
                //一级菜单
                category.Add(new Category()
                {
                    CategoryLevel = 1,
                    Code          = oneNode.Attributes["data-code"].Value,
                    Name          = oneNode.InnerText,
                    ParentCode    = "",
                    Url           = "",
                    State         = 0
                });
            }
            HtmlNode twoNode = doc.DocumentNode.SelectSingleNode(twoCate);
            if (twoNode != null)
            {
                //二级菜单
                category.Add(new Category()
                {
                    CategoryLevel = 2,
                    Code          = twoNode.SelectSingleNode("//dd").Attributes["modelid"].Value,
                    Name          = twoNode.SelectSingleNode("//dt").InnerText,
                    ParentCode    = oneNode.Attributes["data-code"].Value,
                    Url           = "",
                    State         = 0
                });
            }
            HtmlNode threeNode = doc.DocumentNode.SelectSingleNode(threeCate);
            if (threeNode != null)
            {
                //var threeCategory = new Category()
                //{
                //    CategoryLevel = 3,
                //    Code = threeNode.SelectSingleNode("//dd[@id='category-box-third']").Attributes["modelid"].Value,
                //    Name = threeNode.SelectSingleNode("dt").InnerText,
                //    ParentCode = twoNode.SelectSingleNode("//dd").Attributes["modelid"].Value,
                //    Url = "",
                //    State = 0
                //};

                ////三级菜单
                //category.Add(threeCategory);
                var fourNodes = threeNode.SelectNodes("//dd/a");
                foreach (var item in fourNodes)
                {
                    //四级菜单
                    category.Add(new Category()
                    {
                        CategoryLevel = 3,
                        Code          = item.Attributes["data-code"].Value,
                        Name          = item.InnerText,
                        ParentCode    = twoNode.SelectSingleNode("//dd").Attributes["modelid"].Value,
                        Url           = "http:" + item.Attributes["href"].Value,
                        State         = 0
                    });
                }
            }
            #endregion

            return(category);
        }
Пример #40
0
        //excution part
        public override void Excute()
        {
            //TODO: implement the excute function
            try
            {
                DateTime begin = DateTime.Now;
                string url = string.Empty;
                string refererUrl = string.Empty;
                string text = string.Empty;
                string s = string.Empty;
                int num = 0; //count
                base.IniDataTable();
                base.http = new DoNet4.Utilities.HttpClientHelper(0x4e20);

                for (int i = 0; i < base.keywordInfList.Count; i++)
                { //iterate the keyword list
                    base.keywordInf = base.keywordInfList[i];
                    if (string.IsNullOrEmpty(base.keywordInf.keyword))
                    {
                        continue;
                    }
                    base.updateTextBox(base.keywordInf.keyword + " 开始查询", true);
                    url = "http://1688.com";
                    byte retry = 0;
                    while (true)
                    {
                        if (base.http.Get(url).IndexOf("阿里巴巴1688.com - 全球领先的采购批发平台") != -1)
                        {
                            goto Label_search_taobao;
                        }
                        retry = (byte)(retry + 1);
                        if (retry >= 3) { break; }
                        base.updateTextBox("阿里巴巴主页打开失败,重试: " + retry.ToString(), true);
                    }
                    num++;
                    continue;
                    Label_search_taobao:
                    Thread.Sleep(0x7d0);
                    System.Text.Encoding gb2312 = System.Text.Encoding.GetEncoding("gb2312");
                    url = "http://s.1688.com/selloffer/offer_search.htm?keywords=" + HttpUtility.UrlEncode(base.keywordInf.keyword, gb2312).ToUpper();//(base.keywordInf.keyword).ToString();
                    text = base.http.Get(url);
                    refererUrl = url;
                    if (text.Contains("没找到与"))
                    {
                        num++;
                        base.updateTextBox(base.keywordInf.keyword + " 没有找到相关商品", true);
                        Thread.Sleep(200);
                    }
                    else if (text.Contains("淘宝会员(仅限会员名)请在此登录")) {
                        base.updateTextBox(base.keywordInf.keyword + " 阿里巴巴要求登录", true);
                        Thread.Sleep(200);
                    }
                    else
                    {
                        HtmlDocument document = new HtmlDocument();
                        document.LoadHtml(text);
                        HtmlNode node = null;
                        int result = 0;
                        node = document.DocumentNode.SelectSingleNode("//span[@class='sm-widget-offer']");
                        if (node == null)
                        {
                            base.updateTextBox(base.keywordInf.keyword + " node为空(null)!", true);
                        }
                        else
                        {
                            s = StrUnit.MidStrEx(node.InnerHtml, "<em>", "</em>");
                            if (!int.TryParse(s, out result))
                            {
                                base.updateTextBox(base.keywordInf.keyword + " 获取商品数量错误! - 2", true);
                                num++;
                            }
                        }

                        s = StrUnit.MidStrEx(text, "共<em>", "</em>件");
                        
                        if (!int.TryParse(s, out result))
                        {
                            base.updateTextBox(base.keywordInf.keyword + " 获取商品数量错误! - 2", true);
                            num++;
                        }
                        else
                        { //get the useful data, go into iterations
                          //for ()
                            base.updateTextBox("共检索到" + s + " 件相关商品, 开始爬取", true);
                            for (string str6 = this.GetData(text); !string.IsNullOrEmpty(str6); str6 = this.GetData1(text)) //?
                            {
                                Thread.Sleep(0x7d0);
                                text = base.http.Get(str6, refererUrl);
                            }
                            TimeSpan span = (TimeSpan)(DateTime.Now - begin);
                            base.updateTextBox(string.Concat(new object[] { base.keywordInf.keyword, " 获取完毕,耗时:", span.TotalSeconds, "秒" }), true);
                        }

                    }
                }
                base.updateTextBox(base.keywordInf.keyword + " 已到达规定页数,结束", true);
                base.updateTextBox("共 " + base.keywordInfList.Count.ToString() + " 件商品查询完毕,其中 " + num.ToString() + "件未检索到数据", true);
                base.http.Free();
                base.Stoped = true;
            }
            catch (Exception e)
            {
                //exception handling
                Log.WriteLog("taobaoTh Excute Err:" + e.Message + " ,Err Stack:" + e.StackTrace);
            }
        }
Пример #41
0
        private string GetData1(string text)
        {
            string str = string.Empty;
            innerHtml = string.Empty; //innerHtml content
            string source = string.Empty;
            try
            {
                HtmlDocument document = new HtmlDocument();
                document.LoadHtml(text);
                HtmlNode node = null;
                node = document.DocumentNode.SelectSingleNode("//ul[@id='sm-offer-list']");
                if (node == null)
                {
                    base.updateTextBox("获取商品列表失败", true);
                    throw new Exception("获取商品列表失败");
                }

                foreach (string str2 in node.InnerHtml.Split(new string[] { "<li t" }, StringSplitOptions.RemoveEmptyEntries))
                {
                    if (str2.IndexOf("-rank") != -1)
                    {
                        source = StrUnit.MidStrEx(str2, "rank", "</li>");
                        DataRow row = base.OutDataTable.NewRow();
                        row["Platform"] = "1688.com";
                        row["Keyword"] = base.keywordInf.keyword;
                        row["ItemID"] = StrUnit.MidStrEx(source, "t-offer-id=\"", "\"");
                        row["SellerId"] = StrUnit.MidStrEx(source, "t-member-id=\"", "\"");
                        string goodInfo = string.Empty;
                        goodInfo = StrUnit.MidStrEx(source, "sm-offer-photo sw-dpl-offer-photo", "</div>");
                        row["Url"] = StrUnit.MidStrEx(goodInfo, "href=\"", "\"");
                        row["Title"] = StrUnit.MidStrEx(goodInfo, "title=\"", "\"");
                        string priceInfo = string.Empty;
                        priceInfo = StrUnit.MidStrEx(source, "<div class=\"s-widget-offershopwindowprice sm-offer-price sw-dpl-offer-price\">", "</div>");
                        row["Price"] = StrUnit.MidStrEx(priceInfo, "title=\"&yen;", "\"");
                        string companyInfo = string.Empty;
                        companyInfo = StrUnit.MidStrEx(source, "<div class=\"s-widget-offershopwindowcompanyinfo sm-offer-company sw-dpl-offer-company\">", "</div>");
                        row["StoreName"] = StrUnit.MidStrEx(companyInfo, "title=\"", "\"");
                        row["StoreUrl"] = StrUnit.MidStrEx(companyInfo, "href=\"", "\"");
                        base.OutDataTable.Rows.Add(row);
                        Thread.Sleep(50);


                        if (string.IsNullOrEmpty(row["ItemID"].ToString()))
                        {
                            throw new Exception("获取商品列表数据失败! - 1");
                        }
                    }

                }//end of info retrieving
                 /*
                 HtmlNode node2;
                 //node2 = new HtmlNode();
                 node2 = document.DocumentNode.SelectSingleNode("//div[@id='fui_widget_4']/span");
                 string pageSelectionInfo = StrUnit.MidStrEx(text, "<span class=\"fui-paging-list\">", "</span>");
                 string nextPage = string.Empty;
                 if (node2 != null)
                 {
                     //innerHtml = node.InnerHtml;
                     foreach (string pageInfo in node2.InnerHtml.Split(new string[] { "<a href=#" }, StringSplitOptions.RemoveEmptyEntries))
                     {
                         if (pageInfo.IndexOf("fui-next") != -1)
                         {
                             nextPage = StrUnit.MidStrEx(pageInfo, "data-page=\"", "\"");
                             break;
                         }

                         int resultPage = 0;
                         if (!int.TryParse(nextPage, out resultPage))
                         {
                             System.Text.Encoding gb2312 = System.Text.Encoding.GetEncoding("gb2312");
                             str = "https://s.1688.com/selloffer/offer_search.htm?keywords=" + HttpUtility.UrlEncode(base.keywordInf.keyword, gb2312).ToUpper() + "&beginPage=" + nextPage;
                             return str;
                         }
                         base.updateTextBox(base.keywordInf.keyword + " 第 " + (resultPage-1) + " 页获取完成", true);
                         if (resultPage > base.keywordInf.endPage)
                         {
                             str = string.Empty;
                         }

                     }//end of for each
                     //source = StrUnit.
                     return str;
                 }*/
                base.updateTextBox(base.keywordInf.keyword + " 第 " + (page) + " 页获取完成", true);
                if (page >= base.keywordInf.endPage)
                {
                    str = string.Empty;
                    return str;
                }
                ++page;
                System.Text.Encoding gb2312 = System.Text.Encoding.GetEncoding("gb2312");
                str = "https://s.1688.com/selloffer/offer_search.htm?keywords=" + HttpUtility.UrlEncode(base.keywordInf.keyword, gb2312).ToUpper() + "&beginPage=" + page;
                return str;


            }
            catch (Exception e)
            {
                base.updateTextBox("GetData Err:" + e.Message, true);
            }
            return str;
        }
Пример #42
0
        public void DownloadCouncilPdfFiles()
        {
            var           docs    = this.LoadDocumentsDoneSQL();
            var           queries = this.LoadQueriesDoneSQL();
            WebClient     c       = new WebClient();
            HtmlWeb       web     = new HtmlWeb();
            Regex         dateReg = new Regex("[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}");
            List <string> years   = new List <string>();

            for (int i = this.dtStartFrom.Year; i <= DateTime.Now.Year; i++)
            {
                years.Add(i.ToString());
            }

            string yearRegText = string.Format("({0})", string.Join("|", years));
            Regex  yearReg     = new Regex(yearRegText);

            foreach (string url in this.docUrls)
            {
                string             category    = url.Split('*')[0];
                string             categoryUrl = url.Split('*')[1];
                HtmlDocument       doc         = web.Load(categoryUrl);
                HtmlNodeCollection fileNodes   = doc.DocumentNode.SelectNodes("//a[contains(@href,'.pdf')]");

                if (fileNodes != null)
                {
                    foreach (HtmlNode fileNode in fileNodes)
                    {
                        string fileUrl = fileNode.Attributes["href"].Value;
                        fileUrl = fileUrl.StartsWith("http") ? fileUrl : this.cityEntity.CityUrl + fileUrl;
                        string nodeText = System.Web.HttpUtility.HtmlDecode(fileNode.InnerText).Trim('\r', '\n', '\t', (char)32, (char)160);
                        if (string.IsNullOrEmpty(nodeText))
                        {
                            continue;
                        }

                        string meetingDateText = dateReg.Match(nodeText).ToString();
                        if (string.IsNullOrEmpty(meetingDateText))
                        {
                            if (yearReg.IsMatch(fileNode.ParentNode.InnerText))
                            {
                                dateReg         = new Regex("[a-zA-Z]+[\\s]{0,2}[0-9]{1,2},[\\s]{0,2}[0-9]{4}");
                                meetingDateText = dateReg.Match(fileNode.ParentNode.InnerText).ToString();
                            }
                        }

                        if (string.IsNullOrEmpty(meetingDateText))
                        {
                            continue;
                        }
                        DateTime meetingDate = DateTime.Parse(meetingDateText);

                        if (meetingDate < this.dtStartFrom)
                        {
                            Console.WriteLine("Too early, skip...");
                            continue;
                        }

                        this.ExtractADoc(c, fileUrl, category, "pdf", meetingDate, ref docs, ref queries);
                    }
                }
            }
        }
Пример #43
0
 /// <summary>
 /// Get all anchors in the HTML document as a sequence of ILink.
 /// </summary>
 /// <param name="html"></param>
 /// <param name="response">The response from which the html originates. 
 /// The response information is used to convert relative links to absolute links.</param>
 /// <returns></returns>
 public static IEnumerable<Anchor> Anchors(this HtmlDocument html, Response response)
 {
   return Anchors(html, response != null ? response.BaseUri : null);
 }
Пример #44
0
 private static string GetGenre(HtmlDocument htmlDoc)
 {
     return(WebUtility.HtmlDecode(htmlDoc.DocumentNode.Descendants("a")
                                  .Where(node => node.GetAttributeValue("href", "")
                                         .Contains("/genre/")).First().InnerHtml.ToString()));
 }
Пример #45
0
        static void GetSpecificData(string url)
        {
            Console.WriteLine($"обрабатываю {url}");
            HtmlWeb      webDoc = new HtmlWeb();
            HtmlDocument doc    = webDoc.Load(url);

            //description
            HtmlNodeCollection DescriptionNodes = doc.DocumentNode.SelectNodes("//div[@class='weather-cells']/div[@data-text]");

            if (DescriptionNodes != null)
            {
                foreach (HtmlNode node in DescriptionNodes)
                {
                    descriptions.Add(node.Attributes["data-text"].Value);
                }
            }
            else
            {
                Console.WriteLine("Ошибка: пустая коллекция DescriptionNodes");
            }

            //date
            HtmlNodeCollection DateNodes = doc.DocumentNode.SelectNodes("//div[@class='weather-cells']/div[@data-text]/div[1]/div[1]");

            if (DateNodes != null)
            {
                string someMonth = "SomeMonth";
                for (int i = 0; i < DateNodes.Count; i++)
                {
                    if (DateNodes[i].InnerText.Trim().Split(" ").Count() == 2)
                    {
                        someMonth = DateNodes[i].InnerText.Trim().Split(" ")[1];
                        dates.Add(DateNodes[i].InnerText.Trim());
                    }
                    else
                    {
                        dates.Add(DateNodes[i].InnerText.Trim() + " " + someMonth);
                    }
                }

                //city name
                HtmlNode NameNode = doc.DocumentNode.SelectSingleNode("//div[@class='pageinfo_title index-h1']//h1");
                if (NameNode != null)
                {
                    for (int i = 0; i < DateNodes.Count; i++)
                    {
                        names.Add(NameNode.InnerText.Replace(" на месяц", ""));
                    }
                }
                else
                {
                    Console.WriteLine("Ошибка: NameNode не обнаружен");
                }
            }
            else
            {
                Console.WriteLine("Ошибка: пустая коллекция DateNodes");
            }

            //max temperature
            HtmlNodeCollection MaxTemperatureNodes = doc.DocumentNode.SelectNodes("//div[@class='weather-cells']/div[@data-text]//div[@class='temp_max js_meas_container']//span[@class='value unit unit_temperature_c']");

            if (MaxTemperatureNodes != null)
            {
                foreach (HtmlNode node in MaxTemperatureNodes)
                {
                    maxTemperatures.Add(node.InnerText.Trim().Replace("&minus;", "-"));
                }
            }
            else
            {
                Console.WriteLine("Ошибка: пустая коллекция MaxTemperatureNodes");
            }

            //min temperature
            HtmlNodeCollection MinTemperatureNodes = doc.DocumentNode.SelectNodes("//div[@class='weather-cells']/div[@data-text]//div[@class='temp_min js_meas_container']//span[@class='value unit unit_temperature_c']");

            if (MinTemperatureNodes != null)
            {
                foreach (HtmlNode node in MinTemperatureNodes)
                {
                    minTemperatures.Add(node.InnerText.Trim().Replace("&minus;", "-"));
                }
            }
            else
            {
                Console.WriteLine("Ошибка: пустая коллекция MinTemperatureNodes");
            }



            Thread.Sleep(100);
        }
Пример #46
0
        private void webBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            HtmlDocument html = this.webBrowser.Document;

            html.Body.InnerHtml = this.LicenceText;
        }
Пример #47
0
        public IHttpActionResult Get()
        {
            List <Movie> movies = new List <Movie>();

            var     url     = "http://www.imdb.com/movies-coming-soon/?ref_=nv_mv_cs_4";
            HtmlWeb web     = new HtmlWeb();
            var     htmlDoc = web.Load(url);


            var liste = htmlDoc.DocumentNode.SelectNodes("//div[@class='list detail']/div");

            foreach (var item in liste)
            {
                var htmlSubDoc = new HtmlDocument();
                htmlSubDoc.LoadHtml(item.InnerHtml);


                Movie movie = new Movie();
                movie.MovieName   = htmlSubDoc.DocumentNode.SelectSingleNode("//h4[@itemprop='name']/a").InnerHtml;
                movie.ImageUrl    = htmlSubDoc.DocumentNode.SelectSingleNode("//img[@class='poster shadowed']").Attributes["src"].Value;
                movie.Description = htmlSubDoc.DocumentNode.SelectSingleNode("//div[@itemprop='description']").InnerHtml;

                var s = htmlSubDoc.DocumentNode.SelectSingleNode("//time");
                movie.Duration = s != null ? s.InnerHtml : ":(";
                try
                {
                    #region Stars
                    var NodesStar = htmlSubDoc
                                    .DocumentNode
                                    .SelectNodes("//div[@class='txt-block']/span[@itemprop='actors']/span/a");


                    List <Star> stars = new List <Star>();

                    foreach (var str in NodesStar)
                    {
                        Star star = new Star();
                        star.FullName = str.InnerHtml;
                        stars.Add(star);
                    }
                    movie.Stars = stars;
                    #endregion
                }
                catch (System.Exception)
                {
                    continue;
                }
                #region Directors
                var NodesDirector = htmlSubDoc
                                    .DocumentNode
                                    .SelectNodes("//div[@class='txt-block']/span[@itemprop='director']/span/a");

                List <Director> directors = new List <Director>();

                foreach (var drc in NodesDirector)
                {
                    Director director = new Director
                    {
                        FullName = drc.InnerHtml
                    };
                    directors.Add(director);
                }
                movie.Directors = directors;
                #endregion

                #region Genre
                var NodesGenre = htmlSubDoc
                                 .DocumentNode
                                 .SelectNodes("//span[@itemprop='genre']");

                List <Genre> genres = new List <Genre>();

                foreach (var gnr in NodesGenre)
                {
                    Genre genre = new Genre
                    {
                        Name = gnr.InnerHtml
                    };
                    genres.Add(genre);
                }
                movie.Genre = genres;
                #endregion

                movies.Add(movie);
            }

            return(Json(movies));
        }
Пример #48
0
        private HtmlNode GenerateTooltip(HtmlDocument doc, Periode periode, Options options)
        {
            var tooltipLeft = -50;
            var tooltipTop  = 10;

            if (periode.Position.X > 400)
            {
                tooltipLeft = -245;
            }

            if (periode.Position.Y > 300)
            {
                tooltipTop = -(180 + (options.CircleWidth + borderWidth) / 2);
            }

            var tooltip = CreateDiv(doc, "tt", string.Format("left:{0}px;top:{1}px;", periode.Position.X + tooltipLeft, periode.Position.Y + tooltipTop));

            var panelTitle = CreateDiv(doc, "p-t b-b", string.Empty);

            var periodeTitle = doc.CreateElement("span");

            periodeTitle.AddClass("lieu b-b");
            periodeTitle.AppendChild(doc.CreateTextNode(periode.Lieu));

            var panelBody = CreateDiv(doc, "p-b-w", string.Empty);

            if (options.DisplayByYear)
            {
                foreach (var subPeriode in periode.SubPeriodes)
                {
                    if (periode.SubPeriodes.Count > 1)
                    {
                        panelBody.AppendChild(CreatePeriodePeriode(doc, subPeriode));
                        //panelBody.AppendChild(doc.CreateElement("br"));
                    }
                    foreach (var rp in subPeriode.Rps)
                    {
                        GenerateRpNode(doc, panelBody, rp, periode.SubPeriodes.Count == 1 && subPeriode.Rps.Count == 1);
                    }
                }
            }
            else
            {
                foreach (var rp in periode.Rps)
                {
                    GenerateRpNode(doc, panelBody, rp, periode.Rps.Count == 1);
                }
            }

            panelTitle.AppendChild(periodeTitle);
            if (!options.DisplayByYear || periode.SubPeriodes.Count == 1)
            {
                periodeTitle.AppendChild(doc.CreateElement("br"));
                panelTitle.AppendChild(CreatePeriodePeriode(doc, periode));
            }

            tooltip.AppendChild(panelTitle);
            tooltip.AppendChild(panelBody);
            tooltip.AppendChild(CreateDiv(doc, "te", string.Empty));
            return(tooltip);
        }
Пример #49
0
        /*********
        ** Private methods
        *********/
        /// <summary>Get metadata about a mod by scraping the Nexus website.</summary>
        /// <param name="id">The Nexus mod ID.</param>
        /// <returns>Returns the mod info if found, else <c>null</c>.</returns>
        private async Task <NexusMod> GetModFromWebsiteAsync(uint id)
        {
            // fetch HTML
            string html;

            try
            {
                html = await this.WebClient
                       .GetAsync(string.Format(this.WebModScrapeUrlFormat, id))
                       .AsString();
            }
            catch (ApiException ex) when(ex.Status == HttpStatusCode.NotFound)
            {
                return(null);
            }

            // parse HTML
            var doc = new HtmlDocument();

            doc.LoadHtml(html);

            // handle Nexus error message
            HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[contains(@class, 'site-notice')][contains(@class, 'warning')]");

            if (node != null)
            {
                string[] errorParts = node.InnerText.Trim().Split(new[] { '\n' }, 2, System.StringSplitOptions.RemoveEmptyEntries);
                string   errorCode  = errorParts[0];
                string   errorText  = errorParts.Length > 1 ? errorParts[1] : null;
                switch (errorCode.Trim().ToLower())
                {
                case "not found":
                    return(null);

                default:
                    return(new NexusMod {
                        Error = $"Nexus error: {errorCode} ({errorText}).", Status = this.GetWebStatus(errorCode)
                    });
                }
            }

            // extract mod info
            string url     = this.GetModUrl(id);
            string name    = doc.DocumentNode.SelectSingleNode("//div[@id='pagetitle']//h1")?.InnerText.Trim();
            string version = doc.DocumentNode.SelectSingleNode("//ul[contains(@class, 'stats')]//li[@class='stat-version']//div[@class='stat']")?.InnerText.Trim();

            SemanticVersion.TryParse(version, out ISemanticVersion parsedVersion);

            // extract files
            var downloads = new List <IModDownload>();

            foreach (var fileSection in doc.DocumentNode.SelectNodes("//div[contains(@class, 'files-tabs')]"))
            {
                string sectionName = fileSection.Descendants("h2").First().InnerText;
                if (sectionName != "Main files" && sectionName != "Optional files")
                {
                    continue;
                }

                foreach (var container in fileSection.Descendants("dt"))
                {
                    string fileName    = container.GetDataAttribute("name").Value;
                    string fileVersion = container.GetDataAttribute("version").Value;
                    string description = container.SelectSingleNode("following-sibling::*[1][self::dd]//div").InnerText?.Trim(); // get text of next <dd> tag; derived from https://stackoverflow.com/a/25535623/262123

                    downloads.Add(
                        new GenericModDownload(fileName, description, fileVersion)
                        );
                }
            }

            // yield info
            return(new NexusMod
            {
                Name = name,
                Version = parsedVersion?.ToString() ?? version,
                Url = url,
                Downloads = downloads.ToArray()
            });
        }
Пример #50
0
        /// <summary>
        /// 爬取区县
        /// </summary>
        /// <param name="parent"></param>
        /// <param name="url"></param>
        /// <param name="provinceCode"></param>
        /// <returns></returns>
        private void CrawlingCounty(AreaCrawlingModel parent, string url, string provinceCode)
        {
            try
            {
                var isTown = false;
                var html   = GetResponse(BaseUrl + url).Result;
                var doc    = new HtmlDocument();
                doc.LoadHtml(html);
                var nodeList = doc.DocumentNode.SelectNodes("//tr[@class='countytr']");
                if (nodeList == null)
                {
                    nodeList = doc.DocumentNode.SelectNodes("//tr[@class='towntr']");
                    isTown   = true;
                }

                if (nodeList == null)
                {
                    _logger.LogDebug("没有数据");
                    return;
                }
                foreach (var node in nodeList)
                {
                    var codeNode = node.SelectSingleNode("td[1]/a");
                    var nameNode = node.SelectSingleNode("td[2]/a");
                    if (codeNode == null)
                    {
                        codeNode = node.SelectSingleNode("td[1]");
                        nameNode = node.SelectSingleNode("td[2]");
                    }

                    if (codeNode == null || nameNode == null || nameNode.InnerText == "市辖区")
                    {
                        continue;
                    }

                    var model = new AreaCrawlingModel
                    {
                        Code     = codeNode.InnerText,
                        Name     = nameNode.InnerText,
                        FullName = parent.FullName + nameNode.InnerText
                    };

                    SetPinyin(model);
                    CrawlingCoord(model).ConfigureAwait(false);

                    if (!isTown)
                    {
                        var hrefAttribute = codeNode.Attributes["href"];
                        if (hrefAttribute != null)
                        {
                            CrawlingTown(model, hrefAttribute.Value, provinceCode);
                        }
                    }

                    parent.Children.Add(model);
                }
            }
            catch (Exception ex)
            {
                Thread.Sleep(5000);
                parent.Children = new List <AreaCrawlingModel>();
                _logger.LogError($"爬取{parent.Name}下的区县失败");
                _logger.LogError(ex.Message);
                CrawlingCounty(parent, url, provinceCode);
            }
        }
Пример #51
0
        /// <summary>
        /// 指定したURLのWebページに表示されている画像アドレスを取得し、
        /// リストにして返す。
        /// </summary>
        /// <param name="url">Webページのアドレス</param>
        public List <string> DoGetImages(string url)
        {
            List <string> imageAdresses = new List <string>();

            UnDisplayedBrowser udb = new UnDisplayedBrowser();

            udb.NavigateAndWait(url);

            HtmlDocument doc = udb.Document;

            //Webページに表示されている画像の取得
            foreach (HtmlElement imgElement in doc.GetElementsByTagName("IMG"))
            {
                string imgUrl = imgElement.GetAttribute("src");

                if (imgExtensions.Contains(Path.GetExtension(imgUrl)) == false)
                {
                    continue;
                }

                //フィルター式が設定されている場合、除外する
                if (filter_ != null)
                {
                    bool isFiltered = false;

                    foreach (var item in filter_)
                    {
                        if (imgUrl.Contains(item))
                        {
                            isFiltered = true;
                            break;
                        }
                    }

                    if (isFiltered)
                    {
                        continue;
                    }
                }

                if (imageAdresses.Contains(imgUrl) == false)
                {
                    imageAdresses.Add(imgUrl);
                }
            }

            //サムネイル画像をリンク先画像に差し替え
            foreach (HtmlElement linkElement in doc.GetElementsByTagName("A"))
            {
                string imgUrl = linkElement.GetAttribute("href");
                if (imgExtensions.Contains(Path.GetExtension(imgUrl)) == false)
                {
                    continue;
                }

                foreach (HtmlElement imgElement in linkElement.GetElementsByTagName("IMG"))
                {
                    if (imageAdresses.Contains(imgElement.GetAttribute("src")))
                    {
                        imageAdresses.Remove(imgElement.GetAttribute("src"));
                        imageAdresses.Add(imgUrl);
                    }
                }
            }

            return(imageAdresses);
        }
Пример #52
0
        public async Task <ChapterLoadData> GetPageChapTer(Truyen truyen, int page, bool allowGetNoidung = true)
        {
            var pageUrl  = string.Format(TRUYEN_URL, truyen.TruyenUrl, page);
            var htmlData = await WebUtils.DoRequestSimpleGet(pageUrl, null, "", HOME_PAGE);

            if (htmlData.Status)
            {
                var dom = new HtmlDocument();
                dom.LoadHtml(htmlData.Data);
                var chapterDoms = dom.DocumentNode.SelectNodes(DANH_SACH_CHUONG_XPATH);

                var maxPage = 1;
                if (chapterDoms != null)
                {
                    var i        = 0;
                    var j        = 0;
                    var lchapter = new List <Chapter>();
                    foreach (var chapDom in chapterDoms)
                    {
                        i++;
                        if (i < 3)
                        {
                            continue;
                        }
                        var      chap = new Chapter();
                        HtmlNode tdom = null;
                        chap.TruyenUrl     = truyen.TruyenUrl;
                        chap.PageOfChapter = page;
                        tdom = chapDom.SelectSingleNode(CHUONGNUMBER_XPATH);
                        if (tdom != null)
                        {
                            chap.SoThuTu = tdom.InnerText.Trim();
                        }
                        tdom = chapDom.SelectSingleNode(TENCHUONG_XPATH);
                        if (tdom != null)
                        {
                            chap.TenChuong = tdom.InnerText.Trim();
                        }
                        else
                        {
                            continue;
                        }
                        tdom = chapDom.SelectSingleNode(SOURCE_XPATH);
                        if (tdom != null)
                        {
                            chap.Nguon = tdom.InnerText.Trim();
                        }
                        if (allowGetNoidung)
                        {
                            tdom = chapDom.SelectSingleNode(NOIDUNG_XPATH);
                            if (tdom != null)
                            {
                                List <HtmlNode> listRemove =
                                    tdom.ChildNodes.Where(iNode => iNode.Name == "span").ToList();
                                foreach (var iNode in listRemove)
                                {
                                    tdom.RemoveChild(iNode);
                                }
                                chap.NoiDung = tdom.InnerText.Trim();
                            }
                            else
                            {
                                continue;
                            }
                        }
                        chap.IndexNumberPageOfChapter = j++;
                        lchapter.Add(chap);
                    }

                    var ldom = dom.DocumentNode.SelectNodes(MAX_PAGE_TRUYEN_XPATH);
                    if (ldom != null)
                    {
                        foreach (var idom in ldom)
                        {
                            var cdom = idom.SelectSingleNode("./a");
                            if (cdom != null)
                            {
                                var href = cdom.GetAttributeValue("href", "");
                                if (href.IndexOf("=") != -1 && href.IndexOf("&") != -1)
                                {
                                    try
                                    {
                                        href = href.Substring(href.IndexOf("=") + 1, href.IndexOf("&") - href.IndexOf("=") - 1);
                                    }
                                    catch (Exception)
                                    {
                                        // ignored
                                    }
                                }
                                var n = 0;
                                int.TryParse(href, out n);
                                if (n > maxPage)
                                {
                                    maxPage = n;
                                }
                            }
                        }
                    }
                    var result = new ChapterLoadData();
                    result.ListChapter         = lchapter;
                    result.MaxPageIndex        = maxPage;
                    result.IndexStartOfChapter = 0;
                    return(result);
                }
            }
            return(null);
        }
 private static void replaceBlogDiv(HtmlDocument template, IBlogPost blogPost)
 {
     HtmlNode blogDiv = template.DocumentNode.SelectSingleNode(BlogXPath);
     blogDiv.InnerHtml = blogPost.HTML;
 }
Пример #54
0
        private static SiteTruyenData ReadSiteTruyenHtmlData(string htmlData)
        {
            var siteTruyen = new SiteTruyenData();
            var dom        = new HtmlDocument();

            dom.LoadHtml(htmlData);
            var tdom = dom.DocumentNode.SelectNodes(DANHSACHTOPTRUYEN_XPATH);

            if (tdom != null)
            {
                var i          = 0;
                var listTruyen = new List <Truyen>();
                foreach (var idom in tdom)
                {
                    i++;
                    // bo? 2 node dau`
                    if (i < 3)
                    {
                        continue;
                    }
                    var truyen = new Truyen(SiteTruyen.LuongSonBac);
                    try
                    {
                        var cdom = idom.SelectSingleNode("./td[2]");
                        if (cdom != null)
                        {
                            truyen.Category = WebUtility.HtmlDecode(cdom.InnerText.Trim());
                        }
                        else
                        {
                            continue;
                        }
                        cdom = idom.SelectSingleNode("./td[3]");
                        if (cdom != null)
                        {
                            truyen.Title = cdom.InnerText.Trim();
                        }
                        cdom = idom.SelectSingleNode("./td[4]");
                        if (cdom != null)
                        {
                            truyen.Author = cdom.InnerText.Trim();
                        }
                        cdom = idom.SelectSingleNode("./td[6]");
                        if (cdom != null)
                        {
                            truyen.NumberChaper = int.Parse(cdom.InnerText.Trim());
                        }
                        cdom = idom.SelectSingleNode("./td[7]");
                        if (cdom != null)
                        {
                            truyen.NumberView = int.Parse(cdom.InnerText.Trim().Replace(".", ""));
                        }
                        cdom = idom.SelectSingleNode("./td[3]/a[1]");
                        if (cdom != null)
                        {
                            truyen.TruyenUrl = cdom.GetAttributeValue("href", "");
                        }
                        else
                        {
                            continue;
                        }
                    }
                    catch (Exception)
                    {
                        continue;
                    }
                    listTruyen.Add(truyen);
                }

                // tim so trang
                tdom = dom.DocumentNode.SelectNodes(MAXPAGEINDEX_XPATH);
                var maxPage = 1;
                if (tdom != null)
                {
                    foreach (var idom in tdom)
                    {
                        var href = idom.GetAttributeValue("href", "");
                        if (href.LastIndexOf('-') != -1)
                        {
                            href = href.Substring(href.LastIndexOf('-', href.Length - href.LastIndexOf("-")));
                            var n = 0;
                            int.TryParse(href, out n);
                            if (n > maxPage)
                            {
                                maxPage = n;
                            }
                        }
                    }
                }
                siteTruyen.ListTruyen   = listTruyen;
                siteTruyen.MaxPageIndex = maxPage;
                return(siteTruyen);
            }
            return(siteTruyen);
        }
        public override async Task <List <Item> > StartScraping(string searchText, int itemsCount = 0, int minPrice = 0, int maxPrice = 0)
        {
            int         ItemsCount    = 0;
            List <Item> SearchResults = new List <Item>();

            while (true)
            {
                string Url;
                if (minPrice != 0 || maxPrice != 0)
                {
                    string pr1    = FilterChanger.Insert(FilterChanger.IndexOf(";"), minPrice.ToString());
                    string prices = pr1.Insert(pr1.IndexOf("&"), maxPrice.ToString());
                    Url = SimpleUrl + searchText.Replace(" ", "%20");
                    Url = Url.Insert(Url.IndexOf("search="), prices) + PageChanger + StartPageNum;
                }
                else
                {
                    Url = SimpleUrl + searchText.Replace(" ", "%20") + PageChanger + StartPageNum;
                }

                HtmlDocument htmlDoc = await GetHtmlDocument(Url);

                List <HtmlNode> ProdsHtml;

                try
                {
                    ProdsHtml = htmlDoc.DocumentNode.Descendants("div")
                                .Where(node => node.GetAttributeValue("class", "")
                                       .Contains("catalog_main_table j-products-container")).ToList();
                }
                catch (NullReferenceException)
                {
                    Console.WriteLine("Your search did not match any postings.");
                    return(null);
                }

                var ProductsList = new List <HtmlNode>();

                foreach (HtmlNode Htmlnode in ProdsHtml)
                {
                    ProductsList.AddRange(Htmlnode.Descendants("div")
                                          .Where(node => node.GetAttributeValue("class", "")
                                                 .Contains("j-card-item")).ToList());
                }

                Console.WriteLine($"LENGTH IS: {ProductsList.Count}");

                foreach (var item in ProductsList)
                {
                    Console.WriteLine($"Item: {ItemsCount}");
                    if (ItemsCount >= itemsCount)
                    {
                        return(SearchResults);
                    }

                    HtmlNode nameTag = item.Descendants("span")
                                       .Where(node => node.GetAttributeValue("class", "")
                                              .Equals("goods-name")).FirstOrDefault();
                    string name = nameTag == null ? "\n" : nameTag.InnerText;

                    HtmlNode priceTag;
                    string   price;
                    priceTag = item.Descendants("ins")
                               .Where(node => node.GetAttributeValue("class", "")
                                      .Equals("lower-price")).FirstOrDefault();
                    if (priceTag != null)
                    {
                        price = priceTag.InnerText;
                    }
                    else
                    {
                        priceTag = item.Descendants("span")
                                   .Where(node => node.GetAttributeValue("class", "")
                                          .Equals("lower-price")).FirstOrDefault();

                        price = priceTag == null ? "\n" : priceTag.InnerText;
                    }

                    HtmlNode hrefTag = item.Descendants("a")
                                       .Where(node => node.GetAttributeValue("class", "")
                                              .Contains("ref_goods_n_p")).FirstOrDefault();
                    string realLink = hrefTag == null ? "\n" : hrefTag.GetAttributeValue("href", "");

                    HtmlNode imgTag = item.Descendants("img")
                                      .Where(node => node.GetAttributeValue("class", "")
                                             .Equals("thumbnail")).FirstOrDefault();
                    HtmlAttributeCollection attributes;
                    string imgLink;
                    if (imgTag != null)
                    {
                        attributes = imgTag.Attributes;

                        if (attributes.Contains("data-original"))
                        {
                            imgLink = imgTag.GetAttributeValue("data-original", "");
                        }

                        else if (attributes.Contains("src"))
                        {
                            imgLink = imgTag.GetAttributeValue("src", "");
                        }

                        else
                        {
                            imgLink = "\n";
                        }
                    }
                    else
                    {
                        imgLink = "\n";
                    }

                    Item result = new Item(realLink, imgLink, name, price, SiteName);
                    SearchResults.Add(result);

                    Console.WriteLine();
                    ItemsCount++;
                }

                var PageSpan = htmlDoc.DocumentNode.Descendants("div")
                               .Where(node => node.GetAttributeValue("class", "")
                                      .Equals("pageToInsert")).FirstOrDefault();
                if (PageSpan != null)
                {
                    HtmlNode PageHref;
                    PageHref = PageSpan.Descendants("a")
                               .Where(node => node.InnerText == (StartPageNum + 1).ToString()).FirstOrDefault();
                    if (PageHref != null)
                    {
                        StartPageNum++;
                    }
                    else
                    {
                        return(SearchResults);
                    }
                }
                else
                {
                    return(SearchResults);
                }
            }
        }
Пример #56
0
        public void DownloadCouncilPdfFiles()
        {
            List <Documents>   docs    = this.LoadDocumentsDoneSQL();
            List <QueryResult> queries = this.LoadQueriesDoneSQL();
            HtmlWeb            web     = new HtmlWeb();
            WebClient          c       = new WebClient();

            foreach (string url in this.docUrls)
            {
                HtmlDocument       listDoc     = web.Load(url);
                HtmlNodeCollection docNodeList = listDoc.DocumentNode.SelectNodes("//div[contains(@id,'vid')]//ul/li/a[@href]");

                if (docNodeList != null)
                {
                    Console.WriteLine("{0} files...", docNodeList.Count);

                    foreach (HtmlNode docNode in docNodeList)
                    {
                        string pdfName = docNode.InnerText.Trim('\t', '\r', '\n', (char)32, (char)160);
                        string tag     = pdfName.ToLower().Contains("agenda") ? "agenda" : "minute";
                        string pdfUrl  = docNode.Attributes["href"].Value;
                        pdfUrl = pdfUrl.StartsWith("http") ? pdfUrl : "http://www.saginaw-mi.com" + pdfUrl;
                        DateTime meetingDate = DateTime.Parse(pdfUrl.Split('?').FirstOrDefault().Split('/').LastOrDefault().Replace(".pdf", string.Empty));

                        if (meetingDate < this.dtStartFrom)
                        {
                            Console.WriteLine("{0} earlier than {1}. Skip...", meetingDate, dtStartFrom);
                            continue;
                        }

                        Documents localdoc = docs.FirstOrDefault(t => t.DocSource == pdfUrl);

                        if (localdoc == null)
                        {
                            string category = "Council";
                            localdoc           = new Documents();
                            localdoc.DocId     = Guid.NewGuid().ToString();
                            localdoc.DocType   = category;
                            localdoc.CityId    = this.cityEntity.CityId;
                            localdoc.DocSource = pdfUrl;

                            string localPath = string.Format("{0}\\{1}_{2}", this.localDirectory, tag, pdfUrl.Split('?').FirstOrDefault().Split('/').LastOrDefault());
                            localPath             = HttpUtility.UrlDecode(localPath);
                            localdoc.DocLocalPath = localPath;

                            try
                            {
                                c.DownloadFile(pdfUrl, localPath);
                            }
                            catch
                            {
                            }

                            docs.Add(localdoc);
                        }
                        else
                        {
                            Console.ForegroundColor = ConsoleColor.Yellow;
                            Console.WriteLine("This document already downloaded...");
                            Console.ResetColor();
                        }

                        this.ReadText(false, localdoc.DocLocalPath, ref localdoc);
                        QueryResult qr = queries.FirstOrDefault(t => t.DocId == localdoc.DocId);

                        if (qr == null)
                        {
                            qr             = new QueryResult();
                            qr.DocId       = localdoc.DocId;
                            qr.CityId      = localdoc.CityId;
                            qr.MeetingDate = meetingDate;
                            qr.SearchTime  = DateTime.Now;
                            queries.Add(qr);
                        }

                        this.ExtractQueriesFromDoc(localdoc, ref qr);
                        Console.WriteLine("{0} documents saved...", docs.Count);
                        Console.WriteLine("{0} query results saved...", queries.Count);
                    }
                }

                this.SaveMeetingResultsToSQL(docs, queries);
            }
        }
Пример #57
0
        private async Task CheckPage(HtmlDocument htmlDocument)
        {
            if (htmlDocument == null)
            {
                Bot.ArchiLogger.LogNullError(nameof(htmlDocument));
                return;
            }

            HtmlNodeCollection htmlNodes = htmlDocument.DocumentNode.SelectNodes("//div[@class='badge_row_inner']");

            if (htmlNodes == null)
            {
                // No eligible badges whatsoever
                return;
            }

            List <Task> backgroundTasks = null;

            foreach (HtmlNode htmlNode in htmlNodes)
            {
                HtmlNode statsNode = htmlNode.SelectSingleNode(".//div[@class='badge_title_stats_content']");

                HtmlNode appIDNode = statsNode?.SelectSingleNode(".//div[@class='card_drop_info_dialog']");
                if (appIDNode == null)
                {
                    // It's just a badge, nothing more
                    continue;
                }

                string appIDText = appIDNode.GetAttributeValue("id", null);
                if (string.IsNullOrEmpty(appIDText))
                {
                    Bot.ArchiLogger.LogNullError(nameof(appIDText));
                    continue;
                }

                string[] appIDSplitted = appIDText.Split('_');
                if (appIDSplitted.Length < 5)
                {
                    Bot.ArchiLogger.LogNullError(nameof(appIDSplitted));
                    continue;
                }

                appIDText = appIDSplitted[4];

                if (!uint.TryParse(appIDText, out uint appID) || (appID == 0))
                {
                    Bot.ArchiLogger.LogNullError(nameof(appID));
                    continue;
                }

                if (SalesBlacklist.Contains(appID) || Program.GlobalConfig.Blacklist.Contains(appID) || Bot.IsBlacklistedFromIdling(appID) || (Bot.BotConfig.IdlePriorityQueueOnly && !Bot.IsPriorityIdling(appID)))
                {
                    // We're configured to ignore this appID, so skip it
                    continue;
                }

                if (IgnoredAppIDs.TryGetValue(appID, out DateTime ignoredUntil))
                {
                    if (ignoredUntil < DateTime.UtcNow)
                    {
                        // This game served its time as being ignored
                        IgnoredAppIDs.TryRemove(appID, out _);
                    }
                    else
                    {
                        // This game is still ignored
                        continue;
                    }
                }

                // Cards
                HtmlNode progressNode = statsNode.SelectSingleNode(".//span[@class='progress_info_bold']");
                if (progressNode == null)
                {
                    Bot.ArchiLogger.LogNullError(nameof(progressNode));
                    continue;
                }

                string progressText = progressNode.InnerText;
                if (string.IsNullOrEmpty(progressText))
                {
                    Bot.ArchiLogger.LogNullError(nameof(progressText));
                    continue;
                }

                ushort cardsRemaining = 0;
                Match  progressMatch  = Regex.Match(progressText, @"\d+");

                // This might fail if we have no card drops remaining, 0 is not printed in this case - that's fine
                if (progressMatch.Success)
                {
                    if (!ushort.TryParse(progressMatch.Value, out cardsRemaining) || (cardsRemaining == 0))
                    {
                        Bot.ArchiLogger.LogNullError(nameof(cardsRemaining));
                        continue;
                    }
                }

                if (cardsRemaining == 0)
                {
                    // Normally we'd trust this information and simply skip the rest
                    // However, Steam is so f****d up that we can't simply assume that it's correct
                    // It's entirely possible that actual game page has different info, and badge page lied to us
                    // We can't check every single game though, as this will literally kill people with cards from games they don't own
                    // Luckily for us, it seems to happen only with some specific games
                    if (!UntrustedAppIDs.Contains(appID))
                    {
                        continue;
                    }

                    // To save us on extra work, check cards earned so far first
                    HtmlNode cardsEarnedNode = statsNode.SelectSingleNode(".//div[@class='card_drop_info_header']");
                    if (cardsEarnedNode == null)
                    {
                        Bot.ArchiLogger.LogNullError(nameof(cardsEarnedNode));
                        continue;
                    }

                    string cardsEarnedText = cardsEarnedNode.InnerText;
                    if (string.IsNullOrEmpty(cardsEarnedText))
                    {
                        Bot.ArchiLogger.LogNullError(nameof(cardsEarnedText));
                        continue;
                    }

                    Match cardsEarnedMatch = Regex.Match(cardsEarnedText, @"\d+");
                    if (!cardsEarnedMatch.Success)
                    {
                        Bot.ArchiLogger.LogNullError(nameof(cardsEarnedMatch));
                        continue;
                    }

                    if (!ushort.TryParse(cardsEarnedMatch.Value, out ushort cardsEarned))
                    {
                        Bot.ArchiLogger.LogNullError(nameof(cardsEarned));
                        continue;
                    }

                    if (cardsEarned > 0)
                    {
                        // If we already earned some cards for this game, it's very likely that it's done
                        // Let's hope that trusting cardsRemaining AND cardsEarned is enough
                        // If I ever hear that it's not, I'll most likely need a doctor
                        continue;
                    }

                    // If we have no cardsRemaining and no cardsEarned, it's either:
                    // - A game we don't own physically, but we have cards from it in inventory
                    // - F2P game that we didn't spend any money in, but we have cards from it in inventory
                    // - Steam issue
                    // As you can guess, we must follow the rest of the logic in case of Steam issue
                }

                // Hours
                HtmlNode timeNode = statsNode.SelectSingleNode(".//div[@class='badge_title_stats_playtime']");
                if (timeNode == null)
                {
                    Bot.ArchiLogger.LogNullError(nameof(timeNode));
                    continue;
                }

                string hoursText = timeNode.InnerText;
                if (string.IsNullOrEmpty(hoursText))
                {
                    Bot.ArchiLogger.LogNullError(nameof(hoursText));
                    continue;
                }

                float hours      = 0.0F;
                Match hoursMatch = Regex.Match(hoursText, @"[0-9\.,]+");

                // This might fail if we have exactly 0.0 hours played, as it's not printed in that case - that's fine
                if (hoursMatch.Success)
                {
                    if (!float.TryParse(hoursMatch.Value, NumberStyles.Number, CultureInfo.InvariantCulture, out hours) || (hours <= 0.0F))
                    {
                        Bot.ArchiLogger.LogNullError(nameof(hours));
                        continue;
                    }
                }

                // Names
                HtmlNode nameNode = statsNode.SelectSingleNode("(.//div[@class='card_drop_info_body'])[last()]");
                if (nameNode == null)
                {
                    Bot.ArchiLogger.LogNullError(nameof(nameNode));
                    continue;
                }

                string name = nameNode.InnerText;
                if (string.IsNullOrEmpty(name))
                {
                    Bot.ArchiLogger.LogNullError(nameof(name));
                    continue;
                }

                // We handle two cases here - normal one, and no card drops remaining
                int nameStartIndex = name.IndexOf(" by playing ", StringComparison.Ordinal);
                if (nameStartIndex <= 0)
                {
                    nameStartIndex = name.IndexOf("You don't have any more drops remaining for ", StringComparison.Ordinal);
                    if (nameStartIndex <= 0)
                    {
                        Bot.ArchiLogger.LogNullError(nameof(nameStartIndex));
                        continue;
                    }

                    nameStartIndex += 32;                     // + 12 below
                }

                nameStartIndex += 12;

                int nameEndIndex = name.LastIndexOf('.');
                if (nameEndIndex <= nameStartIndex)
                {
                    Bot.ArchiLogger.LogNullError(nameof(nameEndIndex));
                    continue;
                }

                name = WebUtility.HtmlDecode(name.Substring(nameStartIndex, nameEndIndex - nameStartIndex));

                // Levels
                byte badgeLevel = 0;

                HtmlNode levelNode = htmlNode.SelectSingleNode(".//div[@class='badge_info_description']/div[2]");
                if (levelNode != null)
                {
                    // There is no levelNode if we didn't craft that badge yet (level 0)
                    string levelText = levelNode.InnerText;
                    if (string.IsNullOrEmpty(levelText))
                    {
                        Bot.ArchiLogger.LogNullError(nameof(levelText));
                        continue;
                    }

                    int levelIndex = levelText.IndexOf("Level ", StringComparison.OrdinalIgnoreCase);
                    if (levelIndex < 0)
                    {
                        Bot.ArchiLogger.LogNullError(nameof(levelIndex));
                        continue;
                    }

                    levelIndex += 6;
                    if (levelText.Length <= levelIndex)
                    {
                        Bot.ArchiLogger.LogNullError(nameof(levelIndex));
                        continue;
                    }

                    levelText = levelText.Substring(levelIndex, 1);
                    if (!byte.TryParse(levelText, out badgeLevel) || (badgeLevel == 0) || (badgeLevel > 5))
                    {
                        Bot.ArchiLogger.LogNullError(nameof(badgeLevel));
                        continue;
                    }
                }

                // Done with parsing, we have two possible cases here
                // Either we have decent info about appID, name, hours, cardsRemaining (cardsRemaining > 0) and level
                // OR we strongly believe that Steam lied to us, in this case we will need to check game individually (cardsRemaining == 0)
                if (cardsRemaining > 0)
                {
                    GamesToFarm.Add(new Game(appID, name, hours, cardsRemaining, badgeLevel));
                }
                else
                {
                    Task task = CheckGame(appID, name, hours, badgeLevel);

                    switch (Program.GlobalConfig.OptimizationMode)
                    {
                    case GlobalConfig.EOptimizationMode.MinMemoryUsage:
                        await task.ConfigureAwait(false);

                        break;

                    default:
                        if (backgroundTasks == null)
                        {
                            backgroundTasks = new List <Task>();
                        }

                        backgroundTasks.Add(task);
                        break;
                    }
                }
            }

            // If we have any background tasks, wait for them
            if ((backgroundTasks != null) && (backgroundTasks.Count > 0))
            {
                await Task.WhenAll(backgroundTasks).ConfigureAwait(false);
            }
        }
Пример #58
0
        private async Task <bool?> IsAnythingToFarm()
        {
            // Find the number of badge pages
            Bot.ArchiLogger.LogGenericInfo(Strings.CheckingFirstBadgePage);
            HtmlDocument htmlDocument = await Bot.ArchiWebHandler.GetBadgePage(1).ConfigureAwait(false);

            if (htmlDocument == null)
            {
                Bot.ArchiLogger.LogGenericWarning(Strings.WarningCouldNotCheckBadges);
                return(null);
            }

            byte maxPages = 1;

            HtmlNode htmlNode = htmlDocument.DocumentNode.SelectSingleNode("(//a[@class='pagelink'])[last()]");

            if (htmlNode != null)
            {
                string lastPage = htmlNode.InnerText;
                if (string.IsNullOrEmpty(lastPage))
                {
                    Bot.ArchiLogger.LogNullError(nameof(lastPage));
                    return(null);
                }

                if (!byte.TryParse(lastPage, out maxPages) || (maxPages == 0))
                {
                    Bot.ArchiLogger.LogNullError(nameof(maxPages));
                    return(null);
                }
            }

            GamesToFarm.Clear();

            Task mainTask = CheckPage(htmlDocument);

            switch (Program.GlobalConfig.OptimizationMode)
            {
            case GlobalConfig.EOptimizationMode.MinMemoryUsage:
                await mainTask.ConfigureAwait(false);

                if (maxPages > 1)
                {
                    Bot.ArchiLogger.LogGenericInfo(Strings.CheckingOtherBadgePages);

                    for (byte page = 2; page <= maxPages; page++)
                    {
                        await CheckPage(page).ConfigureAwait(false);
                    }
                }

                break;

            default:
                List <Task> tasks = new List <Task>(maxPages)
                {
                    mainTask
                };

                if (maxPages > 1)
                {
                    Bot.ArchiLogger.LogGenericInfo(Strings.CheckingOtherBadgePages);

                    for (byte page = 2; page <= maxPages; page++)
                    {
                        // We need a copy of variable being passed when in for loops, as loop will proceed before our task is launched
                        byte currentPage = page;
                        tasks.Add(CheckPage(currentPage));
                    }
                }

                await Task.WhenAll(tasks).ConfigureAwait(false);

                break;
            }

            if (GamesToFarm.Count == 0)
            {
                ShouldResumeFarming = false;
                return(false);
            }

            ShouldResumeFarming = true;
            await SortGamesToFarm().ConfigureAwait(false);

            return(true);
        }
Пример #59
0
        /// <summary>
        ///     Получить перечисление с парами у указанного преподавателя
        /// </summary>
        /// <param name="siteTeacherId">ID преподавателя на сайте</param>
        /// <returns>Перечисление с парами у указанного преподавателя</returns>
        /// <exception cref="FlurlHttpException">Выбрасывается, если сайт не вернул положительный Http код</exception>
        public async Task <IEnumerable <Lesson> > GetLessons(int siteTeacherId)
        {
            var response = await _client
                           .SetQueryParam("timetable")
                           .SetQueryParam("lecturer", siteTeacherId)
                           .GetAsync();

            var doc = new HtmlDocument();

            doc.Load(await response.Content.ReadAsStreamAsync());

            var timetableNode = doc.DocumentNode.SelectNodes("//div[contains(@class, 'timetable_sheet')]");

            if (timetableNode is null)
            {
                throw new NullReferenceException("Can not find lessons on the page");
            }

            var teacher = doc.DocumentNode.SelectSingleNode("//a[@class='navbar-brand']/span[2]")
                          .GetNormalizedInnerText();

            var lessons = new List <Lesson>();

            foreach (var lessonNode in timetableNode)
            {
                if (lessonNode.ChildNodes.Count <= 3) // если пустая рамка с парой
                {
                    continue;
                }

                var date = lessonNode.ParentNode.SelectSingleNode(".//div[contains(@class,'dayofweek')]")
                           .GetNormalizedInnerText()
                           .Split(new[] { ',' }, 2)[1]
                           .Trim();

                var adr = lessonNode.SelectSingleNode(".//span[contains(@class,'auditorium')]")
                          .GetNormalizedInnerText()
                          .Split(new[] { ',' }, 2)
                          .Select(x => x.Trim())
                          .ToArray();

                var time = lessonNode.SelectSingleNode(".//span[contains(@class,'time_para')]")
                           .GetNormalizedInnerText()
                           .Split(new[] { '–' }, 2);

                var groups = lessonNode.SelectSingleNode(".//span[contains(@class,'group')]").GetNormalizedInnerText();
                var number = byte.Parse(lessonNode.SelectSingleNode(".//span[contains(@class,'num_para')]")
                                        .GetNormalizedInnerText());
                var lessonName = lessonNode.SelectSingleNode(".//span[contains(@class,'discipline')]").GetNormalizedInnerText();
                var lessonType = lessonNode.SelectSingleNode(".//span[contains(@class,'kindOfWork')]").GetNormalizedInnerText();

                lessons.Add(new Lesson
                {
                    Address   = adr[1],
                    Auditory  = adr[0],
                    Number    = number,
                    Groups    = groups,
                    Name      = lessonName,
                    Type      = lessonType,
                    Teacher   = teacher,
                    StartTime = DateTime.ParseExact($"{date} {time[0]}", "dd.MM.yyyy HH:mm", null,
                                                    DateTimeStyles.AssumeLocal),
                    EndTime = DateTime.ParseExact($"{date} {time[1]}", "dd.MM.yyyy HH:mm", null,
                                                  DateTimeStyles.AssumeLocal)
                });
            }

            return(lessons);
        }
Пример #60
0
 private static string GetLabel(HtmlDocument htmlDoc)
 {
     return(WebUtility.HtmlDecode(htmlDoc.DocumentNode
                                  .SelectSingleNode("//*[@id=\"page_content\"]/div[1]/div[3]/div[2]/a")
                                  .InnerText.ToString()));
 }