/// <summary>
        /// 원하는 정보 추출
        /// </summary>
        /// <param name="type"></param>
        /// <param name="siteInfo"></param>
        /// <returns></returns>
        private decimal GetPriceFromInfo(MaterialTypes type, CsQuery.CQ siteInfo)
        {
            string dollarStr = string.Empty;
            var    data      = siteInfo[ApmexConst.APMEX_LiST_QUERY].ToList();

            switch (type)
            {
            case MaterialTypes.Gold:
                dollarStr = data[0].FirstChild.ToString();
                break;

            case MaterialTypes.Silver:
                dollarStr = data[1].FirstChild.ToString();
                break;

            case MaterialTypes.Platinum:
                dollarStr = data[2].FirstChild.ToString();
                break;

            case MaterialTypes.Palladium:
                dollarStr = data[3].FirstChild.ToString();
                break;
            }

            // decimal 변환
            Decimal.TryParse(dollarStr.Replace("$", ""), out decimal result);

            return(result);
        }
Example #2
0
        private List <Bookmark> ImportFromHtml()
        {
            // Для парсинга html выбрана библиотека CsQuery, т.к. другие варианты немного не подходят.
            // AngleSharp требует более высокую версию .NET Framework, а HtmlAgilityPack содержит баги и больше не поддерживается.
            // https://habr.com/en/post/273807/#AngleSharp
            // https://ru.stackoverflow.com/questions/420354/%D0%9A%D0%B0%D0%BA-%D1%80%D0%B0%D1%81%D0%BF%D0%B0%D1%80%D1%81%D0%B8%D1%82%D1%8C-html-%D0%B2-net

            List <Bookmark> bookmarks = new List <Bookmark>();

            CsQuery.CQ cq = CsQuery.CQ.Create(File.ReadAllText(FileName));
            foreach (CsQuery.IDomObject obj in cq.Find("a"))
            {
                if (obj.HasAttribute("href"))
                {
                    Bookmark b = new Bookmark();

                    b.URL = obj.GetAttribute("href");
                    // Не английский текст выводится в виде кодов символов. Нужно декодировать.
                    b.Name = System.Net.WebUtility.HtmlDecode(obj.InnerText);

                    bookmarks.Add(b);
                }
            }

            return(bookmarks);
        }
Example #3
0
        public bool VisitTo(string addr)
        {
            bool result = true;

            HttpWebRequest clientVisit = CreateWebRequest(addr, Method.GET);

            clientVisit.CookieContainer.Add(Cookies); // add cookies to container

            WebResponse requestVisit = clientVisit.GetResponse();

            using (Stream streamGetData = requestVisit.GetResponseStream())
            {
                using (StreamReader reader = new StreamReader(streamGetData))
                {
                    CsQuery.CQ DOM = CsQuery.CQ.Create(reader);                                                                                        // parse html

                    CsQuery.IDomObject  profile     = DOM.Find("div").Where(e => e.ClassName == "logininfo").FirstOrDefault();                         // find profile block
                    CsQuery.IDomElement profileName = profile?.ChildElements.Where(e => e.Attributes["title"] == "Просмотр профиля").FirstOrDefault(); // find name

                    if (profileName != null)
                    {
                        Console.WriteLine($"User: { profileName.FirstChild }");
                        Console.WriteLine($"Visit: `{ DOM.Find("title").Text() }`"); // return title course
                    }
                    else
                    {
                        Console.WriteLine("ERROR LOGIN");
                    }
                }
            }

            requestVisit.Close();
            return(result);
        }
Example #4
0
        //将正文中没有 http:// 开头的img路径替换
        //public static string GetHtml(string sHtmlText)
        //{
        //    //string resultHtml = string.Empty;
        //    // 定义正则表达式用来匹配 img 标签
        //    Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);

        //    // 搜索匹配的字符串
        //    MatchCollection matches = regImg.Matches(sHtmlText);
        //    //int i = 0;
        //    string[] sUrlList = new string[matches.Count];
        //    foreach (Match match in matches)
        //    {
        //        string img = match.Groups["imgUrl"].Value;
        //        //sUrlList[i++] = match.Groups["imgUrl"].Value;
        //        //foreach (var img in sUrlList)
        //        //{
        //        if (!img.StartsWith("data:image/"))
        //        {
        //            if (!img.Contains("this.src"))
        //            {
        //                if (!img.Contains("http://"))
        //                {
        //                    sHtmlText = Regex.Replace(sHtmlText, img, "http://blog2.cnool.net" + img);
        //                }
        //            }
        //        }
        //        //}
        //    }
        //    return sHtmlText;
        //}

        public static string GetHtml(string sHtmlText)
        {
            // 搜索匹配的字符串
            CsQuery.CQ cq   = sHtmlText;
            int        i    = 0;
            var        imgs = cq["img"];

            string[] sUrlList = new string[imgs.Count()];
            foreach (var img in imgs)
            {
                try
                {
                    var src = img.GetAttribute("src");
                    if (!src.Contains("file://"))
                    {
                        if (!src.StartsWith("data:image/"))
                        {
                            if (!src.StartsWith("http://"))
                            {
                                sHtmlText = Regex.Replace(sHtmlText, src, "http://blog2.cnool.net" + src);
                            }
                        }
                    }
                }
                catch (Exception) {
                    continue;
                }
            }
            return(sHtmlText);
        }
Example #5
0
            protected override string GetNumberText(CsQuery.CQ node)
            {
                var p = node.Find("p");

                if (p.Any())
                {
                    node = p;
                }
                return(node.Single().InnerText);
            }
        static int CountWordsOnUrl(string url)
        {
            string html = string.Empty;

            using (var webClient = new WebClient())
                html = webClient.DownloadString(url);

            var text = new CsQuery.CQ(html).Text();

            return(text.Split(' ').Length);
        }
 public PageCrawlCompletedArgs()
 {
     Url         = String.Empty;
     PageContent = new PageContent()
     {
     };
     CQDocument   = new CsQuery.CQ();
     WebException = new WebException()
     {
     };
 }
Example #8
0
        private void ToSSRJson()
        {
            var sFile = getFilePath("2.html");

            CsQuery.CQ dom  = System.IO.File.ReadAllText(sFile);
            var        oSSR = new SSR();

            oSSR.configs = new List <FreeSSR.Server>();
            foreach (var row in dom["tr"])
            {
                CsQuery.CQ rowDom = row.InnerHTML;
                var        tds    = rowDom["td"].ToList();
                var        config = new FreeSSR.Server();
                if (tds.Count > 0)
                {
                    var vtm = tds[0].InnerText;
                    if (int.Parse(vtm.Split('/')[1]) > 8) //T是电信线路,值越大越好
                    {
                        config.server      = tds[1].InnerText;
                        config.server_port = int.Parse(tds[2].InnerText);
                        var p1 = tds[3].InnerText;
                        var p2 = tds[4].InnerText;
                        if (p1 == "rc4-md5" || p1 == "chacha20" || p1.StartsWith("aes-"))
                        {
                            config.method   = p1;
                            config.password = p2;
                        }
                        else
                        {
                            config.method   = p2;
                            config.password = p1;
                        }
                        config.id = Guid.NewGuid().ToString("N");
                    }
                }
                if (!string.IsNullOrWhiteSpace(config.server))
                {
                    oSSR.configs.Add(config);
                }
            }
            var json = Newtonsoft.Json.JsonConvert.SerializeObject(oSSR);

            System.IO.File.WriteAllText(getFilePath("ssr.json"), json);
            var sOutFile = $"{SSR_EXE_PATH}\\ssr.json";

            if (System.IO.File.Exists(sOutFile))
            {
                System.IO.File.Delete(sOutFile);
            }
            System.IO.File.Copy(getFilePath("ssr.json"), sOutFile);
            write_gui_config(sOutFile);
        }
            protected override String GetNumberText(CsQuery.CQ node)
            {
                var text = node.Single().InnerText.Trim();

                if (text == String.Empty)
                {
                    return(node.Find("p").Single().InnerText.Trim());
                }
                else
                {
                    return(text);
                }
            }
Example #10
0
        /// <summary>
        /// Formats the given CsQuery DOM Elements into a list Message objects with all
        /// the relevant information
        /// </summary>
        ///
        /// <param name="messages">
        /// The list of all DOM elements containing the chat messages
        /// </param>
        ///
        /// <param name="isOutMessage">
        /// Indicates whether these are messages sent by the user (outcoming)
        /// or received by the user (sent by other users - incoming)
        /// </param>
        private List <ChatMessage> FormatMessages(CsQuery.CQ messages, bool isOutMessage)
        {
            var res = messages
                      .Select(x => x.Cq())
                      .Select(x =>
            {
                // get the message author. in case this are out-messages, then the author is me
                var author = (isOutMessage) ? "Me" : x.Find(".message-author .text-clickable").Text().Trim();

                // try fetch the content. in case we can't fetch anything - then
                // lets assume that the current message contains a photo and try
                // fetch its url
                var content = x.Find(".selectable-text").Text().Trim();

                // in case the message contains an image instead of text
                // TODO: understand how to use the string to extract the image
                // TODO: add support for videos as well
                if (string.IsNullOrEmpty(content))
                {
                    content = x.Find(".image-thumb > img").Attr("src");
                }

                // TODO: handle this properly, right now we skip any messages
                // that we couldn't find any text or url for
                // we skip them by returning null here and then filtering
                // any null messages, see few lines below \/ \/ \/ \/ \/
                if (string.IsNullOrEmpty(content))
                {
                    return(null);
                }

                return(new ChatMessage
                {
                    Author = author,
                    Content = content
                });
            })
                      .Where(x => x != null)
                      .ToList();

            // resolve author names for incoming messages
            if (!isOutMessage)
            {
                ResolveAuthors(res);
            }
            return(res);
        }
Example #11
0
        private IEnumerable <string> GetAllLinksForPage(string url)
        {
            try
            {
                string     root   = GetHost(url);
                var        client = new System.Net.WebClient();
                CsQuery.CQ data   = client.DownloadString(url);
                var        links  = data["a"].Select((x) => x["href"]).Where((x) => !String.IsNullOrWhiteSpace(x));

                return(links.Where((x) => x.StartsWith("/")).Select((x) => root + x)
                       .Union(links.Where((x) => HasSameHost(root, x))));
            }
            catch
            {
                return(new List <string>(0));
            }
        }
Example #12
0
        /// <summary>
        /// 配置从Html来
        /// </summary>
        /// <param name="oSSR"></param>
        private void ConfigFromHtml(SSR oSSR)
        {
            var sFile = getFilePath("2.html");

            CsQuery.CQ dom = System.IO.File.ReadAllText(sFile);

            foreach (var row in dom["tr"])
            {
                CsQuery.CQ rowDom = row.InnerHTML;
                var        tds    = rowDom["td"].ToList();
                var        config = new Server();
                if (tds.Count > 0)
                {
                    var vtm = tds[0].InnerText;
                    try
                    {
                        if (int.Parse(vtm.Split('/')[1]) > 8) //T是电信线路,值越大越好
                        {
                            config.server      = tds[1].InnerText;
                            config.server_port = int.Parse(tds[2].InnerText);
                            var p1 = tds[3].InnerText;
                            var p2 = tds[4].InnerText;
                            if (p1 == "rc4-md5" || p1 == "chacha20" || p1.StartsWith("aes-"))
                            {
                                config.method   = p1;
                                config.password = p2;
                            }
                            else
                            {
                                config.method   = p2;
                                config.password = p1;
                            }
                            config.id = Guid.NewGuid().ToString("N");
                        }
                        if (!string.IsNullOrWhiteSpace(config.server))
                        {
                            oSSR.configs.Add(config);
                        }
                    }
                    catch { }
                }
            }
        }
Example #13
0
        public DryHtml(string templateFilePath = "")
        {

            //_template = new HtmlDocument();

            _dom = new CsQuery.CQ();

            if (templateFilePath.Length > 255 || templateFilePath.Contains('<'))
            {
                _dom = templateFilePath;

            }
            else if (templateFilePath != "")
            {

                var file = System.IO.File.ReadAllText(templateFilePath);
                _dom = file;

            }
        }
            protected override String GetDescription(CsQuery.CQ descriptionNode)
            {
                var caption = descriptionNode.Find("h3").Single().InnerText.Trim();

                if (caption == String.Empty)
                {
                    caption = descriptionNode.Find("h3 span").Single().InnerText.Trim();
                }
                caption = Common.WrapText(caption);
                var detailedDescription = descriptionNode.Find(".au-accordion__target");

                if (detailedDescription.Any())
                {
                    return(String.Format("{0}{1}{2}", caption, Environment.NewLine, String.Join(Environment.NewLine, detailedDescription.Single().ChildNodes.Select(node => Common.ParseNode(node)))));
                }
                else
                {
                    return(caption);
                }
            }
        internal static List <Model.News> SendRequestToHP(string url)
        {
            try
            {
                #region Get Website Content

                string rssContent;
                using (var wc = new WebClient())
                {
                    ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls |
                                                           SecurityProtocolType.Tls11 |
                                                           SecurityProtocolType.Tls12;
                    rssContent = wc.DownloadString(url);
                }

                #endregion

                #region Extract News

                #endregion
                var        categories = @"3COM Security Bulletins
                            3rd Party Software Security Bulletins
                            HP General SW Security Bulletins
                            HP Hardware and Firmware Security Bulletins
                            HP MPE/iX Security Bulletins
                            Multi-Platform Software Security Bulletins
                            HP NonStop Servers Security Bulletins
                            HP OpenVMS Security Bulletins
                            ProCurve Security Bulletins
                            HP Storage SW Security Bulletins
                            HP Tru64 UNIX Security Bulletins


                            HP-UX UNIX Security Bulletins";
                CsQuery.CQ dom        = rssContent;
                var        tables     = dom["table"].Has("tr");

                var news = new List <Model.News>();
                foreach (var table in tables)
                {
                    if (!categories.Contains(table.Attributes["title"] ?? "none"))
                    {
                        continue;
                    }

                    var tb = table.ChildNodes.Where(cn => cn.NodeName.Contains("TBODY")).ToList();

                    if (tb.Any())
                    {
                        foreach (var tbItems in tb[0].ChildNodes)
                        {
                            var row = tbItems.ChildNodes?.Where(cn => cn?.FirstChild != null).ToList();
                            if (row == null)
                            {
                                continue;
                            }
                            DateTime date = DateTime.Parse(
                                new System.Text.RegularExpressions.Regex(@"\d{4}\/\d{1,2}\/\d{1,2}")
                                .Match(row[0].InnerHTML).Value);

                            if (date < DateTime.Today.AddDays(-1))
                            {
                                continue;
                            }

                            news.Add(new Model.News()
                            {
                                Identifier  = row[1].InnerHTML,
                                PublishDate = date,
                                Title       = ((CsQuery.Implementation.HtmlAnchorElement)row[3].ChildNodes[0]).InnerHTML,
                                Description = ((CsQuery.Implementation.HtmlAnchorElement)row[3].ChildNodes[0]).InnerHTML,
                                Url         = "http://support.hpe.com" + ((CsQuery.Implementation.HtmlAnchorElement)row[3].ChildNodes[0]).Href,
                                Supplier    = Model.NewsSupplier.HP,
                                //NewsCategory = Model.NewsCategory.Advisory
                            });
                        }
                    }
                }
                if (news.Any())
                {
                    Console.WriteLine($"[+] HP: {news.Count}");
                }

                return(news);
            }
            catch (Exception exception)
            {
                ShowNotify(exception.Message, exception.StackTrace, "HP");
                return(new List <Model.News>());
            }
        }
Example #16
0
 private String[] GetUrls(CsQuery.CQ node)
 {
     return(node.Find("img").Not(".pdd-inline-sign__icon").Not("p span img").Select(img => img.GetAttribute("src")).ToArray());
 }
Example #17
0
 protected abstract String GetDescription(CsQuery.CQ descriptionNode);
Example #18
0
 protected abstract String GetNumberText(CsQuery.CQ node);
Example #19
0
            protected override string GetDescription(CsQuery.CQ descriptionNode)
            {
                var nodes = descriptionNode.Find(@"[style=""font-family: Arial, Helvetica, sans-serif; font-size: 14px; color: black;""]");

                return(String.Join(Environment.NewLine, nodes.Select(node => Common.ParseNode(node))));
            }
 /// <summary>
 /// apmex 사이트중 위 헤더 받아오기
 /// </summary>
 /// <param name="siteDom"></param>
 /// <returns></returns>
 private CsQuery.CQ GetExtractTitleFromApmex(CsQuery.CQ siteDom)
 {
     return(siteDom[ApmexConst.APMEX_TITLE_QUERY]);
 }
Example #21
0
 public DryHtml(string template, object model)
 {
     _dom = new CsQuery.CQ();
     _dom = template.Replace(model);
 }
Example #22
0
        public static string[] GetHtmlImageUrlList(string sHtmlText)
        {
            string imgSrc = "";

            // 定义正则表达式用来匹配 img 标签
            //Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
            //<img alt="查看更多精彩图片" onload="var image=new Image();image.src=this.src;if(image.width&gt;0 _fcksavedurl=" border="0" src="http://photo1.hexun.com/p/2006/0425/18694/b_8FB7A2DC2E0BA1D9.jpg" />

            CsQuery.CQ cq   = sHtmlText;
            int        i    = 0;
            var        imgs = cq["img"];

            string[] sUrlList = new string[imgs.Count()];
            foreach (var img in imgs)
            {
                try
                {
                    var src = img.GetAttribute("src");
                    if (!src.StartsWith("data:image/"))
                    {
                        if (!src.StartsWith("http://"))
                        {
                            imgSrc = "http://blog2.cnool.net" + src;
                        }
                        else
                        {
                            imgSrc = src;
                        }
                    }
                    //sUrlList[i++] = match.Groups["imgUrl"].Value;
                    sUrlList[i++] = imgSrc;
                }
                catch (Exception) {
                    continue;
                }
            }

            // 搜索匹配的字符串
            //MatchCollection matches = regImg.Matches(sHtmlText);
            //int i = 0;
            //string[] sUrlList = new string[matches.Count];

            //// 取得匹配项列表
            //foreach (Match match in matches)
            //{
            //    string img = match.Groups["imgUrl"].Value;
            //    if (!img.StartsWith("data:image/"))
            //    {
            //        if (!img.StartsWith("http://"))
            //        {
            //            //imgSrc = img.ToLower().Replace(img, "http://blog2.cnool.net" + img);
            //            imgSrc = "http://blog2.cnool.net" + img;
            //        }
            //        else
            //        {
            //            imgSrc = img;
            //        }
            //    }
            //    //sUrlList[i++] = match.Groups["imgUrl"].Value;
            //    sUrlList[i++] = imgSrc;
            //}
            //imgSrc = String.Join(",", sUrlList);
            //return imgSrc;
            return(sUrlList);
        }