/// <summary>
        /// 解析下一个链接
        /// </summary>
        /// <returns></returns>
        protected override string ParseNextUrl()
        {
            if (this.IsEmptyPage())
            {
                return(null);
            }

            var currentUrl = this.CurrentUrl;

            if (StringExtension.IsNullOrWhiteSpace(currentUrl))
            {
                return(null);
            }

            var nextpageUrl = HtmlDocumentHelper.GetValueByXPath(this.HtmlSource, @"//a[text()='下一页']/@href");

            if (StringExtension.IsNullOrWhiteSpace(nextpageUrl))
            {
                return(null);
            }

            string baseUrl;
            NameValueCollection collection;

            Url.ParseUrl(currentUrl, out baseUrl, out collection);

            var pageNo = int.Parse(collection[@"pageNo"]);

            collection["@pageNo"] = $"{pageNo + 1}";

            return(Url.CombinUrl(baseUrl, collection));
        }
        public async Task BankIdAuthentication_Login_Returns_Form_With_Resolved_Cancel_Url()
        {
            // Arrange
            var options       = new BankIdLoginOptions(new List <string>(), null, false, true, false, false, "~/cru", DefaultStateCookieName);
            var mockProtector = new Mock <IBankIdLoginOptionsProtector>();

            mockProtector
            .Setup(protector => protector.Unprotect(It.IsAny <string>()))
            .Returns(options);
            using var server = CreateServer(o =>
            {
                o.UseSimulatedEnvironment()
                .AddSameDevice();
            },
                                            DefaultAppConfiguration(async context =>
            {
                await context.ChallengeAsync(BankIdDefaults.SameDeviceAuthenticationScheme);
            }),
                                            services =>
            {
                services.AddTransient(s => mockProtector.Object);
            });

            // Act
            var request     = CreateRequestWithStateCookie(server, "/BankIdAuthentication/Login?returnUrl=%2F&loginOptions=X&orderRef=Y");
            var transaction = await request.GetAsync();

            // Assert
            Assert.Equal(HttpStatusCode.OK, transaction.StatusCode);

            var document = await HtmlDocumentHelper.FromContent(transaction.Content);

            Assert.Equal("/cru", document.GetInputValue("input[name='CancelReturnUrl']"));
        }
 public static async Task <HtmlDocument> CreateHtmlDocumentAsync(this IRestClient restClient, Uri uri)
 {
     return(HtmlDocumentHelper.CreateNew
            (
                await restClient.GetAsync(uri)
            ));
 }
        public async Task BankIdAuthentication_Login_Returns_Form_And_Status()
        {
            // Arrange
            using var server = CreateServer(o =>
            {
                o.UseSimulatedEnvironment()
                .AddSameDevice();
            },
                                            DefaultAppConfiguration(async context =>
            {
                await context.ChallengeAsync(BankIdDefaults.SameDeviceAuthenticationScheme);
            }),
                                            services =>
            {
                services.AddTransient(s => _bankIdLoginOptionsProtector.Object);
            });

            // Act
            var request     = CreateRequestWithStateCookie(server, "/BankIdAuthentication/Login?returnUrl=%2F&loginOptions=X&orderRef=Y");
            var transaction = await request.GetAsync();

            // Assert
            Assert.Equal(HttpStatusCode.OK, transaction.StatusCode);

            var document = await HtmlDocumentHelper.FromContent(transaction.Content);

            Assert.NotNull(document.GetElement <IHtmlFormElement>("form[id='bankIdLoginForm']"));
            Assert.NotNull(document.GetElement <IHtmlDivElement>("div[id='bankIdLoginStatus']"));
            Assert.NotNull(document.GetElement <IHtmlImageElement>("img.qr-code-image"));
            Assert.Equal("/", document.GetInputValue("input[name='ReturnUrl']"));
            Assert.Equal("/", document.GetInputValue("input[name='CancelReturnUrl']"));
            Assert.Equal("X", document.GetInputValue("input[name='LoginOptions']"));
            Assert.Equal("true", document.GetInputValue("input[name='AutoLogin']"));
        }
 public static async Task <HtmlDocument> CreateHtmlDocumentAsync(this IRestClient restClient, Uri uri, HttpContent content)
 {
     return(HtmlDocumentHelper.CreateNew
            (
                await restClient.PostAsync(uri, content)
            ));
 }
Beispiel #6
0
        private static List <string> GetTagsFromPostContent(HtmlDocument doc)
        {
            // Parse tags from the content of a blog post that contains HTML
            // similar to the following:
            //
            //    <h3>
            //      Tags</h3>
            //    <ul>
            //      <li><a href="..." rel="tag">    My
            //        System    </a></li>
            //      <li><a href="..." rel="tag">Toolbox</a></li>
            //    </ul>
            //
            // For the example HTML above, a list containing "My System" and
            // "Toolbox" would be returned.

            var tagLinks = doc.DocumentNode.SelectNodes(
                "//h3[normalize-space() = 'Tags']/following-sibling::ul/li/a");

            var tags = new List <string>();

            if (tagLinks != null)
            {
                foreach (var tagLink in tagLinks)
                {
                    var tag = HtmlDocumentHelper.NormalizeWhitespace(
                        tagLink.InnerText).Trim();

                    tags.Add(tag);
                }
            }

            return(tags);
        }
        /// <summary>
        ///     解析当前总页数
        /// </summary>
        /// <param name="content">The content.</param>
        /// <returns></returns>
        private int ParseAmountPage(string content)
        {
            if (content == null)
            {
                return(-1);
            }

            var navigator = HtmlDocumentHelper.CreateNavigator(content);
            var node      = navigator.SelectSingleNode(@"//div[@class='jPage']/em");

            if (node == null)
            {
                return(-1);
            }

            /*
             *   <div class="jPage">
             *  <em>共104条记录</em>
             *  <span>上一页</span>
             *  <a class="current">1</a>
             *  <a href="//mall.jd.com/view_search-504028-1000007084-1000007084-0-5-0-0-1-2-24.html?isGlobalSearch=0">2</a>
             *  <a href="//mall.jd.com/view_search-504028-1000007084-1000007084-0-5-0-0-1-3-24.html?isGlobalSearch=0">3</a>
             *  <span>...</span>
             *  <a href="//mall.jd.com/view_search-504028-1000007084-1000007084-0-5-0-0-1-5-24.html?isGlobalSearch=0">5</a>
             *  <a href="//mall.jd.com/view_search-504028-1000007084-1000007084-0-5-0-0-1-2-24.html?isGlobalSearch=0">下一页</a>
             *          </div>
             */
            var matchResults = Regex.Match(node.Value, @"(?<=共)\d+(?=条记录)");

            return(matchResults.Success ? int.Parse(matchResults.Value) : -1);
        }
        /// <summary>
        /// 推荐是不是商品页面
        /// </summary>
        /// <param name="webContent"></param>
        /// <returns></returns>
        private bool GuessIsSearchWebContent(string webContent)
        {
            if (string.IsNullOrEmpty(webContent))
            {
                return(false);
            }

            var navigator = HtmlDocumentHelper.CreateNavigator(webContent);
            var iterator  = navigator.Select(@"//a");

            var itemCount = 0;

            foreach (XPathNavigator item in iterator)
            {
                var href = item.GetAttribute(@"href", string.Empty);

                if (href == null)
                {
                    continue;
                }

                if (Regex.IsMatch(href, @"^//item\.jd\.com/\d+\.html", RegexOptions.IgnoreCase))
                {
                    itemCount++;
                }

                if (itemCount >= 8)
                {
                    return(true);
                }
            }

            return(false);
        }
Beispiel #9
0
 private void GetPersonPageUrls(string localDir, string listPageUrl, List<Dictionary<string, string>> allPersonPageUrlInfos)
 {
     try
     {
         string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir);
         HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath);
         HtmlNodeCollection allLiNodes = pageHtmlDoc.DocumentNode.SelectNodes("//ol[@class=\"search-results\"]/li");
         foreach (HtmlNode liNode in allLiNodes)
         {
             if (liNode.GetAttributeValue("class", "").Contains("people"))
             {
                 HtmlNode personLinkNode = liNode.SelectSingleNode("./div[@class=\"bd\"]/h3/a");
                 string personUrl = CommonUtil.UrlDecodeSymbolAnd(personLinkNode.GetAttributeValue("href", ""));
                 string personName = personLinkNode.InnerText.Trim();
                 Dictionary<string, string> personPageUrlInfo = new Dictionary<string, string>();
                 personPageUrlInfo.Add("personUrl", personUrl);
                 personPageUrlInfo.Add("personName", personName);
                 allPersonPageUrlInfos.Add(personPageUrlInfo);
             }
         }
     }
     catch (Exception ex)
     {
         this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true);
         throw new Exception("解析列表页出错, listPageUrl = +" + listPageUrl, ex);
     }
 }
        /// <summary>
        ///     解析出商品
        /// </summary>
        /// <param name="htmlSource">The HTML source.</param>
        /// <param name="listOnly">仅解析出列表,不解析价格等需要再次访问网络的内容.</param>
        /// <returns></returns>
        private IResut[] ParseCurrentItems(string htmlSource, bool listOnly = false)
        {
/*
 #if DEBUG
 *          htmlSource = "";
 *          var htmlSources = File.ReadAllLines(@"C:\Users\Administrator\Desktop\htmlSource.txt",System.Text.Encoding.UTF8);
 *          for (int i=0;i< htmlSources.Length;i++)
 *          {
 *              htmlSource += htmlSources[i];
 *          }
 *
 #endif
 */
            const string SkuIdKey   = "ProductSku";
            var          resultList = new List <IResut>();

            var navigator = HtmlDocumentHelper.CreateNavigator(htmlSource);
            var iterator  = navigator.Select(@"//ul/li");

            foreach (XPathNavigator item in iterator)
            {
                var title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']/a/text()");
                var href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']//@href");
                if (string.IsNullOrEmpty(title))
                {
                    title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']/a/text()");
                    href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']//@href");
                }

                var imgSrc          = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jPic']//@original");
                var skuMatchResults = Regex.Match(href, @"(?<=/)\d+(?=\.html)");
                var sku             = skuMatchResults.Success ? skuMatchResults.Value : string.Empty;

                if (string.IsNullOrEmpty(sku))
                {
                    continue;
                }

                // 评价数据
                var comments = ParseComments(item);

                IResut resut = new Resut();

                resut[SkuIdKey]          = sku;
                resut["ShopId"]          = ShopUrl;
                resut["ProductName"]     = title;
                resut["ProductUrl"]      = href;
                resut["ProductImage"]    = imgSrc;
                resut["ProductComments"] = comments;
                resultList.Add(resut);
            }

            if (!listOnly)
            {
                this.UpdateResultsPrices(resultList, SkuIdKey);
            }

            return(resultList.ToArray());
        }
        /// <summary>
        /// 解析当前页的所有产品信息
        /// </summary>
        /// <param name="htmlSource"></param>
        /// <param name="listOnly"></param>
        /// <returns></returns>
        private IResut[] ParseCurrentItems(string htmlSource, bool listOnly = false)
        {
            const string SkuIdKey   = "ProductSku";
            var          resultList = new List <IResut>();

            // 返回xpath查询器
            var navigator = HtmlDocumentHelper.CreateNavigator(htmlSource);
            var iterator  = navigator.Select(@"//ul/li");

            foreach (XPathNavigator item in iterator)
            {
                var title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']/a/text()");
                var href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']//@href");
                if (string.IsNullOrEmpty(title))
                {
                    title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']/a/text()");
                    href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']//@href");
                }



                //HtmlDocumentHelper.GetNodeValue(item,".//div[@class='jPic']//@original")

                var imgSrc = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jPic']//@original");
                if (imgSrc.Equals(string.Empty))
                {
                    imgSrc = HtmlDocumentHelper.GetNodeValue(htmlSource, ".//div[@class='jPic']//@src");
                }

                var skuMatchResults = Regex.Match(href, @"(?<=/)\d+(?=.html)");
                var sku             = skuMatchResults.Success ? skuMatchResults.Value : string.Empty;

                if (string.IsNullOrEmpty(sku))
                {
                    continue;
                }

                // 评价数据
                var comments = ParseComments(item);

                IResut resut = new Resut();

                resut[SkuIdKey]          = sku;
                resut["ShopUrl"]         = this.ShopUrl;
                resut["ProductName"]     = title;
                resut["ProductUrl"]      = href;
                resut["ProductImage"]    = imgSrc;
                resut["ProductComments"] = comments;
                resultList.Add(resut);
            }

            if (!listOnly)
            {
                this.UpdateResultsPrices(resultList, SkuIdKey);
            }

            return(resultList.ToArray());
        }
Beispiel #12
0
        /// <summary>
        /// 开始解析
        /// </summary>
        /// <param name="webContent">Content of the web.</param>
        /// <returns></returns>
        public IResut Parse(string webContent)
        {
            IResut resut     = new Resut();
            var    navigator = HtmlDocumentHelper.CreateNavigator(webContent);

            ParseShopScoreResult(resut, navigator);

            throw new NotImplementedException();
        }
Beispiel #13
0
        /*        /// <summary>
         * ///     返回自营店的店铺编号列表
         * /// </summary>
         * /// <param name="shopDictionary">The shop dictionary.</param>
         * /// <returns></returns>
         * private string[] GetSelfSupportShopIds(IDictionary<string, IResut> shopDictionary)
         * {
         *  var shopIdList = new List<string>();
         *
         *  foreach (var shopItem in shopDictionary)
         *  {
         *      var shopId = shopItem.Key;
         *      if (shopItem.Value.GetStringValue(@"vender_type") == "0")
         *      {
         *          shopIdList.Add(shopId);
         *      }
         *  }
         *
         *  return shopIdList.ToArray();
         * }*/

        /// <summary>
        ///     Parses the identifier from xpath.
        /// </summary>
        /// <param name="htmlSource">The HTML source.</param>
        /// <param name="xpath">The xpath.</param>
        /// <returns></returns>
        private string[] ParseIdFromXpath(string htmlSource, string xpath)
        {
            var navigator = HtmlDocumentHelper.CreateNavigator(htmlSource);
            var iterator  = navigator.Select(xpath);
            var xItems    = HtmlDocumentHelper.CopyNodeToArray(iterator);

            var pids = Array.ConvertAll(xItems, item => item.Value);

            return(DictionaryHelper.Distinct(pids));
        }
        private string BuildAjaxSearchUrl(string webContent, IDictionary <string, string> renderStructure)
        {
            var matchResults = Regex.Match(webContent, "(?<=var params = ){[^}]+}");

            if (!matchResults.Success)
            {
                throw new NotSupportedException("无法从页面中解析出搜索参数");
            }

            var jObject   = JObject.Parse(matchResults.Value);
            var navigator = HtmlDocumentHelper.CreateNavigator(webContent);



            //System.Func<string, string> readJsonFunc = key => JsonHelper.TryReadJobjectValue(jObject, key,(string)null);

            //System.Func<string, string> readHtmlFunc = key =>
            //{
            //    string value;
            //    renderStructure.TryGetValue(key,out value);
            //    return value;
            //};

            //System.Func<string, string> readInputFunc = key =>
            //HtmlDocumentHelper.GetNodeValue(navigator,$@"//input[@id='{key}']/@value");

            var collection = Url.CreateQueryCollection();



            collection[@"appId"]            = ReadJsonFunc(jObject, "appId");
            collection[@"orderBy"]          = "5";
            collection[@"pageNo"]           = "1";
            collection[@"direction"]        = "1";
            collection[@"categoryId"]       = ReadJsonFunc(jObject, @"categoryId");
            collection[@"pageSize"]         = @"24";
            collection[@"pagePrototypeId"]  = ReadJsonFunc(jObject, @"pagePrototypeId");
            collection[@"pageInstanceId"]   = ReadHtmlFunc(renderStructure, @"m_render_pageInstance_id");
            collection[@"moduleInstanceId"] = ReadHtmlFunc(renderStructure, "m_render_instance_id");
            collection[@"prototypeId"]      = ReadHtmlFunc(renderStructure, @"m_render_prototype_id");
            collection[@"templateId"]       = ReadHtmlFunc(renderStructure, @"m_render_template_id");
            collection[@"layoutInstanceId"] = ReadHtmlFunc(renderStructure, @"m_render_layout_instance_id");
            collection[@"origin"]           = ReadHtmlFunc(renderStructure, @"m_render_origin");
            collection[@"shopId"]           = ReadInputFunc(navigator, @"shop_id");
            collection[@"verderId"]         = ReadInputFunc(navigator, @"vender_id");

            collection[@"_"] = $"{JsCodeHelper.GetDateTime()}";

            var baseUrl = renderStructure[@"m_render_is_search"] == "true"
                            ? @"http://module-jshop.jd.com/module/getModuleHtml.html"
                            : @"http://mall.jd.com/view/getModuleHtml.html";

            return(Url.CombinUrl(baseUrl, collection));
        }
        /// <summary>
        ///     构造搜索  ajax url
        /// </summary>
        /// <param name="webContent">Content of the web.</param>
        /// <param name="renderStructure">The render structure.</param>
        /// <returns></returns>
        /// <exception cref="System.NotSupportedException">$无法从页面中解析出搜索参数</exception>
        private string BuildAjaxSearchUrl(string webContent, IDictionary <string, string> renderStructure)
        {
            /*webContent = File.ReadAllText(@"C:\Users\sinoX\Desktop\京东搜索页.html");*/
            var matchResults = Regex.Match(webContent, "(?<=var params = ){[^}]+}");

            if (!matchResults.Success)
            {
                throw new NotSupportedException("无法从页面中解析出搜索参数");
            }

            // {"appId":"435517","orderBy":"5","direction":"0","categoryId":"0","pageSize":"24","venderId":"1000004373","isGlobalSearch":"0","maxPrice":"0","pagePrototypeId":"17","pageNo":"1","shopId":"1000004373","minPrice":"0"}
            var jObject   = JObject.Parse(matchResults.Value);
            var navigator = HtmlDocumentHelper.CreateNavigator(webContent);


            System.Func <string, string> readJsonFunc = key => JsonHelper.TryReadJobjectValue(jObject, key, (string)null);
            System.Func <string, string> readHtmlFunc = key =>
            {
                string value;
                renderStructure.TryGetValue(key, out value);
                return(value);
            };
            System.Func <string, string> readInputFunc =
                key => HtmlDocumentHelper.GetNodeValue(navigator, $@"//input[@id='{key}']/@value");

            var collection = Url.CreateQueryCollection();

            collection[@"appId"]            = readJsonFunc("appId");
            collection[@"orderBy"]          = "5";
            collection[@"pageNo"]           = "1";
            collection[@"direction"]        = "1";
            collection[@"categoryId"]       = readJsonFunc(@"categoryId");
            collection[@"pageSize"]         = @"24";
            collection[@"pagePrototypeId"]  = readJsonFunc(@"pagePrototypeId");
            collection[@"pageInstanceId"]   = readHtmlFunc(@"m_render_pageInstance_id");
            collection[@"moduleInstanceId"] = readHtmlFunc(@"m_render_instance_id");
            collection[@"prototypeId"]      = readHtmlFunc(@"m_render_prototype_id");
            collection[@"templateId"]       = readHtmlFunc(@"m_render_template_id");
            collection[@"layoutInstanceId"] = readHtmlFunc(@"m_render_layout_instance_id");
            collection[@"origin"]           = readHtmlFunc(@"m_render_origin");
            collection[@"shopId"]           = readInputFunc(@"shop_id");
            collection[@"venderId"]         = readInputFunc(@"vender_id");

            /*collection[@"callback"] = @"jshop_module_render_callback";  // 不用这个直接返回一个 json 结构 */
            collection[@"_"] = $"{JsCodeHelper.GetDateTime()}";

            var baseUrl = renderStructure[@"m_render_is_search"] == "true"
                              ? @"http://module-jshop.jd.com/module/getModuleHtml.html"
                              : @"http://mall.jd.com/view/getModuleHtml.html";

            return(Url.CombinUrl(baseUrl, collection));
        }
        /// <summary>
        /// 处理特例
        /// </summary>
        /// <param name="shopUrl"></param>
        /// <returns></returns>
        private string GetSpecialSearchPageContent(string shopUrl)
        {
            var webContent = this.GetWebContent(shopUrl);

            var pageAppId = HtmlDocumentHelper.GetNodeValue(webContent, @"//input[@id='pageInstance_appId']/@value");
            var vender_id = HtmlDocumentHelper.GetNodeValue(webContent, @"//input[@id='vender_id']/@value");
            var shop_id   = HtmlDocumentHelper.GetNodeValue(webContent, @"//input[@id='shop_id']/@value");

            var searchUrl = $"http://mall.jd.com/advance_search-{pageAppId}-{vender_id}-{shop_id}-5-0-0-1-1-24.html";

            this.CurrentUrl = searchUrl;

            return(this.GetWebContent(searchUrl));
        }
Beispiel #17
0
        private void GetPersonPageUrls(string localDir, string listPageUrl, List <Dictionary <string, string> > allPersonPageUrlInfos, string keyWords)
        {
            try
            {
                string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir);
                HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath);
                HtmlNodeCollection           allDivNodes = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"result c-container \"]");
                foreach (HtmlNode divNode in allDivNodes)
                {
                    string linkedinUrlPart = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./div[@class=\"f13\"]/a", true, true, null, null);
                    if (linkedinUrlPart == null)
                    {
                        linkedinUrlPart = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./div/div[@class=\"f13\"]/a", true, true, null, null);
                    }

                    string abstractText = HtmlDocumentHelper.TryGetNodeInnerText(divNode, true, true, null, null);

                    if (linkedinUrlPart != null && linkedinUrlPart.Contains(".linkedin.com/in/") && abstractText != null && abstractText.ToLower().Contains(keyWords.ToLower()))
                    {
                        try
                        {
                            string personName = HtmlDocumentHelper.TryGetNodeInnerText(divNode, "./h3/a", true, true, null, null);
                            string personUrl  = HtmlDocumentHelper.TryGetNodeAttributeValue(divNode, "./h3/a", "href", true, true, null, null);
                            foreach (string postfix in this.BaiduLinkedinItemPostfix)
                            {
                                personName = personName.Replace(postfix, "").Trim();
                            }
                            Dictionary <string, string> personPageUrlInfo = new Dictionary <string, string>();
                            personPageUrlInfo.Add("personUrl", personUrl);
                            personPageUrlInfo.Add("personName", personName.Trim());
                            allPersonPageUrlInfos.Add(personPageUrlInfo);
                        }
                        catch (Exception ex)
                        {
                            throw new Exception("获取个人网页地址时出错", ex);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true);
                throw new Exception("解析Baidu列表页出错, listPageUrl = +" + listPageUrl, ex);
            }
        }
Beispiel #18
0
        private async Task <HttpResponseMessage> GetInitializeResponse(HttpClient client, object initializeRequestBody)
        {
            // Arrange csrf info
            var loginResponse = await client.GetAsync("/BankIdAuthentication/Login?returnUrl=%2F&loginOptions=X&orderRef=Y");

            var loginCookies = loginResponse.Headers.GetValues("set-cookie");
            var document     = await HtmlDocumentHelper.FromContent(loginResponse.Content);

            var csrfToken = document.GetRequestVerificationToken();

            // Arrange acting request
            var initializeRequest = new JsonContent(initializeRequestBody);

            initializeRequest.Headers.Add("Cookie", loginCookies);
            initializeRequest.Headers.Add("RequestVerificationToken", csrfToken);

            return(await client.PostAsync("/BankIdAuthentication/Api/Initialize", initializeRequest));
        }
Beispiel #19
0
        private void _handleHeaderBodyFooter(string path, string htmlContent, string idFooter, string idHeader = null)
        {
            _pathTempSaveFile = path;
            var htmlDocumentHelper = new HtmlDocumentHelper(htmlContent);

            _headerContent = null;

            if (!string.IsNullOrEmpty(_url))
            {
                htmlDocumentHelper.MergeUrlOnImage(_url);
            }

            if (!string.IsNullOrEmpty(idFooter))
            {
                _footerContent = htmlDocumentHelper.GetContentById(idFooter);
            }

            _bodyContent = htmlDocumentHelper.RemoveContentById(idFooter);
        }
        /// <summary>
        /// 解析出总共的页数
        /// </summary>
        /// <param name="content"></param>
        /// <returns></returns>
        private int ParseAmountPage(string content)
        {
            if (content == null)
            {
                return(-1);
            }

            var navigator = HtmlDocumentHelper.CreateNavigator(content);
            var node      = navigator.SelectSingleNode(@"//div[@class='jPage']/em");

            if (node == null)
            {
                return(-1);
            }

            var matchResults = Regex.Match(node.Value, @"(?<=共)\d+(?=条记录)");

            return(matchResults.Success ? int.Parse(matchResults.Value) : -1);
        }
Beispiel #21
0
 private void GetPersonPageUrls(string localDir, string listPageUrl, List <Dictionary <string, string> > allPersonPageUrlInfos)
 {
     try
     {
         string listPageLocalPath = this.RunPage.GetFilePath(listPageUrl, localDir);
         HtmlAgilityPack.HtmlDocument pageHtmlDoc = HtmlDocumentHelper.Load(listPageLocalPath);
         HtmlNodeCollection           allANodes   = pageHtmlDoc.DocumentNode.SelectNodes("//div[@class=\"rc\"]/h3/a");
         foreach (HtmlNode aNode in allANodes)
         {
             string personUrl = HtmlDocumentHelper.TryGetNodeAttributeValue(aNode, "data-href", true, true, null, null);
             if (personUrl == null)
             {
                 personUrl = HtmlDocumentHelper.TryGetNodeAttributeValue(aNode, "href", true, true, null, null);
             }
             if (personUrl.Contains(".linkedin.com/in/"))
             {
                 try
                 {
                     string personName = aNode.InnerText.Trim();
                     foreach (string postfix in this.GoogleLinkedinItemPostfix)
                     {
                         personName = personName.Replace(postfix, "");
                     }
                     Dictionary <string, string> personPageUrlInfo = new Dictionary <string, string>();
                     personUrl = CommonUtil.UrlDecode(personUrl);
                     personPageUrlInfo.Add("personUrl", personUrl);
                     personPageUrlInfo.Add("personName", personName.Trim());
                     allPersonPageUrlInfos.Add(personPageUrlInfo);
                 }
                 catch (Exception ex)
                 {
                     throw new Exception("获取个人网页地址时出错", ex);
                 }
             }
         }
     }
     catch (Exception ex)
     {
         this.RunPage.InvokeAppendLogText(ex.Message, LogLevelType.Error, true);
         throw new Exception("解析Google列表页出错, listPageUrl = +" + listPageUrl, ex);
     }
 }
        /// <summary>
        ///解析评论人数以及评价
        /// </summary>
        /// <param name="item"></param>
        /// <returns></returns>
        private static string ParseComments(XPathNavigator item)
        {
            var matchResults = Regex.Match(item.Value, @"\d+(?= *人评价)");

            if (matchResults.Success)
            {
                return(matchResults.Value);
            }

            // 这里还没有找到例子
            var commentsNode = HtmlDocumentHelper.GetNodeValue(item, ".//span[@class='evaluate']");

            matchResults = Regex.Match(commentsNode, @"(?<=\()\d+(?=\))");
            if (matchResults.Success)
            {
                return(matchResults.Value);
            }

            return("-1");
        }
Beispiel #23
0
        /// <summary>
        /// 同步考次(考试时间)
        /// </summary>
        /// <returns></returns>
        private List <DictionaryModel> SysTimes()
        {
            List <DictionaryModel> datas = new List <DictionaryModel>();

            var options = new HttpClientOptions();

            options.URL              = AppHelper.UrlApplyPage;
            options.Method           = "GET";
            options.CookieCollection = CurrentCookies;
            var result = new HttpWebClientUtility().Request(options);

            if (!VerifyHelper.IsEmpty(result.Content))
            {
                var doc = HtmlDocumentHelper.Load(result.Content);
                if (doc != null)
                {
                    var timesNode = HtmlDocumentHelper.FindChildNodes(doc, AppHelper.XPathTimes);
                    if (timesNode != null)
                    {
                        foreach (var item in timesNode.Where(x => x.OriginalName == "a"))
                        {
                            var valueAttribute = item.Attributes["attrval"];
                            if (valueAttribute != null)
                            {
                                datas.Add(new DictionaryModel()
                                {
                                    Id     = Guid.NewGuid(),
                                    Genre  = AppConst.DictionaryTimes,
                                    Name   = StringHelper.Get(item.InnerText),
                                    Value  = StringHelper.Get(valueAttribute.Value),
                                    Parent = "",
                                    Sort   = 0
                                });
                            }
                        }
                    }
                }
            }

            return(datas);
        }
        /// <summary>
        /// 返回搜索页面的内容
        /// </summary>
        /// <param name="shopUrl"></param>
        /// <returns></returns>
        private string GetSearchPageContent(string shopUrl)
        {
            var webContent = this.GetWebContent(shopUrl);
            // 取出<input type="hidden" value="504028" id="pageInstance_appId"/>中的value
            var pageAppId = HtmlDocumentHelper.GetNodeValue(webContent, @"//input[@id='pageInstance_appId']/@value");

            /*
             * // view_search-店铺页面编号-0-排序类型-排序方向-每页条数-页码.html
             *  排序类型: 5 销量 4 价格 3 收藏  2 时间
             *  每页条数: 最大 24
             *  排序方向: 1 从大到时小  0 从小到大
             *  页码: 从 1 开始
             *  查找pageInstance_appId找到value的值
             *  http://mall.jd.com//view_search-337310-0-5-0-24-5.html
             */


            var searchUrl = $"http://mall.jd.com/view_search-{pageAppId}-0-5-0-24-1.html";


            return(this.GetWebContent(searchUrl));
        }
Beispiel #25
0
        public async Task ParseAmazon()
        {
            try
            {
                var helper = new RequestHelper();
                var data   = await helper.SendRequestAsync(LinkAmazon, headers : HeaderBuilder.GetDefaultHeaders(), automaticDecompression : true);

                if (!data.Contains("Currently unavailable"))
                {
                    var document = new HtmlDocument();
                    document.LoadHtml(data);
                    var text = HtmlDocumentHelper.GetNodeByParams(document.DocumentNode, HtmlTag.div, HtmlAttribute.id, "centerCol").InnerText.Replace("\n", " ");
                    _emailHelper.SendEmail(text);
                }
                else
                {
                    File.AppendAllText(@"D:\playstationLog.log", $"{DateTime.Now}: Not available in Amazon{Environment.NewLine}");
                }
            }
            catch (Exception e)
            {
                File.AppendAllText(@"D:\playstationLog.log", $"{DateTime.Now}: {e}{Environment.NewLine}");
            }
        }
Beispiel #26
0
        public static async Task <List <DataModelBase> > Start(string link)
        {
            var result = new List <DataModelBase>();

            try
            {
                Driver.Navigate().GoToUrl(string.IsNullOrEmpty(link) ? Link : link);
                var doc = new HtmlDocument();
                doc.LoadHtml(Driver.PageSource);
                var nodes      = doc.DocumentNode;
                var pageNumber = Convert.ToInt32(HtmlDocumentHelper.GetNodeByParams(nodes, "nav", "class", "be-pagination u-margin-t-large").ChildNodes[4].InnerText);
                // iterate through every page
                try
                {
                    for (var i = 1; i <= pageNumber; i++)
                    {
                        var sb          = new StringBuilder();    // list of ids
                        var listPerPage = new List <DataModel>(); // list of lots per page
                        Driver.Navigate().GoToUrl(Link + $"?page={i}");
                        doc.LoadHtml(Driver.PageSource);
                        var lots = HtmlDocumentHelper.GetNodeByParams(doc.DocumentNode, "div", "class", "be-lot-list gallery");
                        foreach (var lot in lots.ChildNodes)
                        {
                            try
                            {
                                var name = HtmlDocumentHelper.GetNodeByParams(lot, "h2", "class", "c-card__title be-lot__title").InnerText;
                                var url  = HtmlDocumentHelper.GetNodeByParams(lot, "a", "class", "c-card").GetAttributeValue("href", null);
                                var id   = new Uri(url).Segments[2].Split('-')[0];
                                listPerPage.Add(new DataModel {
                                    Name = name, Url = url, CurrentBid = int.Parse(id)
                                });
                                sb.Append($",{id}");
                            }
                            catch
                            {
                                //ignore
                            }
                        }

                        var ids = sb.ToString().TrimStart(',');

                        var helper   = new RequestHelper();
                        var response = await helper.SendRequestAsync($"{JsonUrl}?ids={ids}", automaticDecompression : true, headers : HeaderBuilder.GetDefaultHeaders());

                        var list = JsonConvert.DeserializeObject <JsonResult>(response);

                        foreach (var item in listPerPage)
                        {
                            try
                            {
                                var correspondentItem = list.Lots.FirstOrDefault(x => x.id == item.CurrentBid);
                                item.BiddingEndTime = correspondentItem.bidding_end_time;
                                var amount = correspondentItem.current_bid_amount;
                                item.CurrentBidAmount = amount.EUR;
                                item.ReservedPrice    = correspondentItem.reserve_price_met.HasValue && correspondentItem.reserve_price_met.Value ? "Has reserve price" : "No reserve price";
                                result.Add(item);
                            }
                            catch
                            {
                                //ignore
                            }
                        }
                        ;
                    }
                }
                catch
                {
                    //ignore
                }
            }
            catch (Exception e)
            {
                //ignore
            }
            return(result);
        }
        public async Task Cancel_Calls_CancelApi()
        {
            // Arrange mocks
            var autoLaunchOptions = new BankIdLoginOptions(new List <string>(), null, false, true, false, false, string.Empty, DefaultStateCookieName);
            var mockProtector     = new Mock <IBankIdLoginOptionsProtector>();

            mockProtector
            .Setup(protector => protector.Unprotect(It.IsAny <string>()))
            .Returns(autoLaunchOptions);
            var testBankIdApi = new TestBankIdApi(new BankIdSimulatedApiClient());

            using var server = CreateServer(
                      o =>
            {
                o.UseSimulatedEnvironment().AddSameDevice();
                o.AuthenticationBuilder.Services.AddTransient <IBankIdLauncher, TestBankIdLauncher>();
            },
                      DefaultAppConfiguration(async context =>
            {
                await context.ChallengeAsync(BankIdDefaults.SameDeviceAuthenticationScheme);
            }),
                      services =>
            {
                services.AddTransient(s => mockProtector.Object);
                services.AddSingleton <IBankIdApiClient>(s => testBankIdApi);
            });

            // Arrange csrf info
            var loginRequest  = CreateRequestWithStateCookie(server, "/BankIdAuthentication/Login?returnUrl=%2F&loginOptions=X&orderRef=Y");
            var loginResponse = await loginRequest.GetAsync();

            var loginCookies = loginResponse.Headers.GetValues("set-cookie");
            var document     = await HtmlDocumentHelper.FromContent(loginResponse.Content);

            var csrfToken = document.GetRequestVerificationToken();

            // Arrange acting request
            var testReturnUrl     = "/TestReturnUrl";
            var testOptions       = "TestOptions";
            var initializeRequest = new JsonContent(new { returnUrl = testReturnUrl, loginOptions = testOptions });

            initializeRequest.Headers.Add("Cookie", loginCookies);
            initializeRequest.Headers.Add("RequestVerificationToken", csrfToken);

            // Act
            var client = server.CreateClient();
            var initializeTransaction = await client.PostAsync("/BankIdAuthentication/Api/Initialize", initializeRequest);

            var initializeResponseContent = await initializeTransaction.Content.ReadAsStringAsync();

            var initializeObject = JsonConvert.DeserializeAnonymousType(initializeResponseContent, new { RedirectUri = "", OrderRef = "", IsAutoLaunch = false });

            var cancelRequest = new JsonContent(new
            {
                orderRef        = initializeObject.OrderRef,
                loginOptions    = "TestOptions",
                cancelReturnUrl = "/"
            });

            cancelRequest.Headers.Add("Cookie", loginCookies);
            cancelRequest.Headers.Add("RequestVerificationToken", csrfToken);

            // Act
            var cancelTransaction = await client.PostAsync("/BankIdAuthentication/Api/Cancel", cancelRequest);

            // Assert
            Assert.Equal(HttpStatusCode.OK, cancelTransaction.StatusCode);
            Assert.True(testBankIdApi.CancelAsyncIsCalled);
        }
Beispiel #28
0
        private static async Task Parse()
        {
            var requestHelper = new RequestHelper();
            var header        = HeaderBuilder.BuildOwnHeaders(new HeaderModel()
            {
                Accept     = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
                Host       = "gamevaluenow.com",
                Referer    = "https://gamevaluenow.com/",
                User_Agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"
            });
            var html = await requestHelper.SendRequestAsync(PageUrl, headers : HeaderBuilder.GetDefaultHeaders());

            var doc = new HtmlDocument();

            doc.LoadHtml(html);
            var result = new List <GameValueNowModel>();
            var nodes  = HtmlDocumentHelper.GetNodesByParamsUseXpathStartsWith(doc.DocumentNode, HtmlTag.a, HtmlAttribute._class, "brand-");

            foreach (var node in nodes)
            {
                try
                {
                    var href = node.GetAttributeValue("href", null);
                    if (href is null || href == "#")
                    {
                        continue;
                    }
                    var model = new GameValueNowModel();
                    var url   = $"{PageUrl}{href}";
                    var name  = node.InnerText;
                    model.URL          = url;
                    model.PlatformName = WebUtility.HtmlDecode(name);
                    if (result.Select(x => x.PlatformName).Contains(model.PlatformName))
                    {
                        continue;
                    }
                    result.Add(model);
                }
                catch
                {
                    //ignore;
                }
            }

            foreach (var item in result)
            {
                try
                {
                    item.Data = new List <Data>();
                    var document = new HtmlDocument();

                    var dataHtml = await requestHelper.SendRequestAsync(item.URL, headers : header);

                    document.LoadHtml(dataHtml);

                    // stats
                    var statsNode    = HtmlDocumentHelper.GetNodeByParams(document.DocumentNode, HtmlTag.div, HtmlAttribute.id, "stats");
                    var statListNode = HtmlDocumentHelper.GetNodesByParams(statsNode, HtmlTag.div, HtmlAttribute._class, "col-100 stat");

                    var avgLoosePrice    = statListNode.FirstOrDefault(x => x.InnerText.Contains("Avg Loose"));
                    var avgCompletePrice = statListNode.FirstOrDefault(x => x.InnerText.Contains("Avg Complete"));
                    var looseSetValue    = statListNode.FirstOrDefault(x => x.InnerText.Contains("Loose Set"));
                    var completeSetValue = statListNode.FirstOrDefault(x => x.InnerText.Contains("Complete Set"));
                    var sharpOfGames     = statListNode.FirstOrDefault(x => x.InnerText.Contains("#"));

                    if (avgLoosePrice != null)
                    {
                        item.AvgLoosePrice = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(avgLoosePrice, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }
                    if (avgCompletePrice != null)
                    {
                        item.AvgCompletePrice = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(avgCompletePrice, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }
                    if (looseSetValue != null)
                    {
                        item.LooseSetValue = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(looseSetValue, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }
                    if (completeSetValue != null)
                    {
                        item.CompleteSetValue = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(completeSetValue, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }
                    if (sharpOfGames != null)
                    {
                        item.SharpOfGames = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(sharpOfGames, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }

                    // items
                    var listNode =
                        HtmlDocumentHelper.GetNodeByParams(document.DocumentNode, HtmlTag.div, HtmlAttribute.id, "item-list");
                    var collectionItemNodes = HtmlDocumentHelper.GetNodesByParamsUseXpathStartsWith(listNode, HtmlTag.div,
                                                                                                    HtmlAttribute._class, "item-row desktop all");
                    foreach (var collectionItemNode in collectionItemNodes)
                    {
                        try
                        {
                            var data = new Data();
                            var name = HtmlDocumentHelper
                                       .GetNodeByParams(collectionItemNode, HtmlTag.a, HtmlAttribute._class, "game-link").InnerText;
                            var id = HtmlDocumentHelper
                                     .GetNodeByParams(collectionItemNode, HtmlTag.div, HtmlAttribute._class, "item-number").InnerText
                                     .Replace("\n", "").Replace(" ", "");
                            data.Title = name;
                            data.Id    = id;
                            var priceContainer = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(collectionItemNode, HtmlTag.div, HtmlAttribute._class, "price-col-container");
                            if (priceContainer is null)
                            {
                                continue;
                            }
                            var prices = HtmlDocumentHelper.GetNodesByParamsUseXpathStartsWith(priceContainer, HtmlTag.a, HtmlAttribute._class, "game-link");
                            if (prices is null)
                            {
                                continue;
                            }
                            var priceCount = prices.Count;
                            if (priceCount > 0)
                            {
                                data.Loose = prices[0].InnerText;
                            }
                            if (priceCount > 1)
                            {
                                data.Complete = prices[1].InnerText;
                            }
                            if (priceCount > 2)
                            {
                                data.New = prices[2].InnerText;
                            }
                            if (priceCount > 3)
                            {
                                data.Graded = prices[3].InnerText;
                            }
                            data.PlatformName = item.PlatformName;
                            item.Data.Add(data);
                        }
                        catch
                        {
                            //ignore
                        }
                    }
                }
                catch (Exception e)
                {
                    //ignore
                }
                await Task.Delay(500);
            }

            await SaveToJsonFile(result);

            //await SaveToSql(result);
        }
        /// <summary>
        /// GetShopDetail
        /// </summary>
        /// <param name="listSheet"></param>
        /// <param name="pageSourceDir"></param>
        /// <param name="resultEW"></param>
        private void GetShopDetail(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW)
        {
            Dictionary <string, string> shopDic = new Dictionary <string, string>();

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                //listSheet中只有一条记录
                Dictionary <string, string> row = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string             pageUrl      = row[SysConfig.DetailPageUrlFieldName];
                    string             provinceName = row["provinceName"];
                    string             cityCode     = row["cityCode"];
                    string             cityName     = row["cityName"];
                    string             shopCode     = row["shopCode"];
                    string             shopName     = row["shopName"];
                    string             serviceTime  = "";
                    string             tel          = "";
                    string             address      = "";
                    Nullable <decimal> lng          = null;
                    Nullable <decimal> lat          = null;
                    string             serviceItems = "";

                    string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir);
                    HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i);

                    HtmlNodeCollection allInfoNameNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"shopInfo\"]/dl/dt");
                    if (allInfoNameNodes != null)
                    {
                        foreach (HtmlNode infoNameNode in allInfoNameNodes)
                        {
                            string infoName = infoNameNode.InnerText;
                            if (infoName.StartsWith("服务时间"))
                            {
                                HtmlNode infoNode = HtmlDocumentHelper.GetNextNode(infoNameNode, "dd");
                                if (infoNode != null)
                                {
                                    serviceTime = infoNode.InnerText;
                                }
                            }

                            /*
                             * else if (infoName.StartsWith("服务地址"))
                             * {
                             *  HtmlNode infoNode = HtmlDocumentHelper.GetNextNode(infoNameNode, "dd");
                             *  if (infoNode != null)
                             *  {
                             *      HtmlNode infoSpanNode = infoNode.SelectSingleNode("./span");
                             *      if (infoSpanNode != null)
                             *      {
                             *          address = infoSpanNode.InnerText;
                             *      }
                             *  }
                             * }
                             * else if (infoName.StartsWith("服务电话"))
                             * {
                             *  HtmlNode infoNode = HtmlDocumentHelper.GetNextNode(infoNameNode, "dd");
                             *  if (infoNode != null)
                             *  {
                             *      HtmlNode infoSpanNode = infoNode.SelectSingleNode("./span");
                             *      if (infoSpanNode != null)
                             *      {
                             *          tel = infoSpanNode.InnerText;
                             *      }
                             *  }
                             * }
                             */
                        }
                    }

                    HtmlNodeCollection allScriptNodes = htmlDoc.DocumentNode.SelectNodes("//script");
                    if (allScriptNodes != null)
                    {
                        foreach (HtmlNode scriptNode in allScriptNodes)
                        {
                            string script = scriptNode.InnerText;
                            if (script.Contains("var lng = \""))
                            {
                                int lngBeginIndex = script.IndexOf("var lng = \"") + 11;
                                int lngEndIndex   = script.IndexOf("\";", lngBeginIndex);
                                int latBeginIndex = script.IndexOf("var lat = \"") + 11;
                                int latEndIndex   = script.IndexOf("\";", latBeginIndex);
                                lng = decimal.Parse(script.Substring(lngBeginIndex, lngEndIndex - lngBeginIndex));
                                lat = decimal.Parse(script.Substring(latBeginIndex, latEndIndex - latBeginIndex));
                                break;
                            }
                        }
                    }

                    HtmlNode telNode = htmlDoc.DocumentNode.SelectSingleNode("//input[@id=\"HiddenStrPhone\"]");
                    if (telNode != null)
                    {
                        tel = telNode.Attributes["value"].Value;
                    }

                    HtmlNode keyWordNode = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='keywords']");
                    string   keyWord     = keyWordNode.Attributes["content"].Value;
                    string[] kws         = keyWord.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);
                    address = kws[1];

                    StringBuilder      serviceItemSB        = new StringBuilder();
                    HtmlNodeCollection allServiceGroupNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"shopProject\"]/table/tbody/tr");

                    if (allServiceGroupNodes != null)
                    {
                        for (int j = 0; j < allServiceGroupNodes.Count; j++)
                        {
                            HtmlNode           serviceGroupNode = allServiceGroupNodes[j];
                            HtmlNodeCollection allServiceNodes  = serviceGroupNode.SelectNodes("./td");
                            foreach (HtmlNode serviceNode in allServiceNodes)
                            {
                                string serviceText = serviceNode.InnerText.Trim();
                                if (serviceText.EndsWith(":"))
                                {
                                    serviceItemSB.Append(serviceText);
                                }
                                else
                                {
                                    serviceItemSB.Append(serviceText + ";");
                                }
                            }
                        }
                        serviceItems = serviceItemSB.ToString();
                    }

                    Dictionary <string, object> f2vs = new Dictionary <string, object>();
                    f2vs.Add("provinceName", provinceName);
                    f2vs.Add("cityCode", cityCode);
                    f2vs.Add("cityName", cityName);
                    f2vs.Add("shopCode", shopCode);
                    f2vs.Add("shopName", shopName);
                    f2vs.Add("serviceTime", serviceTime);
                    f2vs.Add("address", address);
                    f2vs.Add("lng", lng);
                    f2vs.Add("lat", lat);
                    f2vs.Add("tel", tel);
                    f2vs.Add("serviceItems", serviceItems);
                    resultEW.AddRow(f2vs);
                }
            }
        }
Beispiel #30
0
        private static async Task Parse()
        {
            var html = await GetData(PageUrl);

            var doc = new HtmlDocument();

            doc.LoadHtml(html);
            var result = new List <PriceChartingModel>();
            var brands = HtmlDocumentHelper.GetNodesByParams(doc.DocumentNode, HtmlTag.li, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "brand").Where(x => x.GetAttributeValue("id", null) != null && x.GetAttributeValue("id", null) != "tools");

            foreach (var brand in brands)
            {
                try
                {
                    var dropDownMenu = HtmlDocumentHelper.GetNodeByParamsUseXpathStartsWith(brand, HtmlTag.ul, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "menu-dropdown");
                    if (dropDownMenu != null)
                    {
                        var regions = HtmlDocumentHelper.GetNodesByParams(dropDownMenu, HtmlTag.ul, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "region");
                        if (regions != null)
                        {
                            foreach (var region in regions)
                            {
                                var regionName = HtmlDocumentHelper.GetNodeByParams(region, HtmlTag.li, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "title")?.InnerText;
                                var categories = region.SelectNodes(".//li").Where(x => x.InnerHtml != string.Empty);
                                foreach (var category in categories)
                                {
                                    if (category.ChildNodes[0] == null || category.ChildNodes[0].GetAttributeValue("href", null) == null)
                                    {
                                        continue;
                                    }
                                    var url  = category.ChildNodes[0].GetAttributeValue("href", null);
                                    var name = category.ChildNodes[0].InnerText;
                                    result.Add(new PriceChartingModel
                                    {
                                        Region       = regionName,
                                        CategoryName = name,
                                        URL          = url
                                    });
                                }
                            }
                        }
                        else
                        {
                            var categories = brand.SelectNodes(".//li").Where(x => x.InnerHtml != string.Empty);
                            foreach (var category in categories)
                            {
                                if (category.ChildNodes[0]?.GetAttributeValue("href", null) == null)
                                {
                                    continue;
                                }
                                var url  = category.ChildNodes[0].GetAttributeValue("href", null);
                                var name = category.ChildNodes[0].InnerText;
                                result.Add(new PriceChartingModel
                                {
                                    Region       = "NTSC (USA)",
                                    CategoryName = name,
                                    URL          = url
                                });
                            }
                        }
                    }
                }
                catch { }
            }
            foreach (var category in result)
            {
                try
                {
                    var categoryData = new List <Data>();
                    category.Data = new List <Data>();
                    var response  = new List <Product>();
                    var pageCount = 0;


                    while (true)
                    {
                        try
                        {
                            var jUrl     = PageUrlJson.Replace("{category}", category.URL.Split('/').Last()).Replace("{count}", pageCount.ToString());
                            var dataJson = await GetData(jUrl);

                            var jsonObj = JsonConvert.DeserializeObject <Response>(dataJson);
                            response.AddRange(jsonObj.products);
                            if (jsonObj.products.Length == 50)
                            {
                                pageCount += 50;
                                Thread.Sleep(100);
                                continue;
                            }
                            break;
                        }
                        catch
                        {
                        }
                    }

                    foreach (var item in response)
                    {
                        categoryData.Add(new Data
                        {
                            Title      = item.productName,
                            LoosePrice = item.price1,
                            CIBPrice   = item.price3,
                            NewPrice   = item.price2
                        });
                    }
                    category.Data = categoryData;
                }
                catch (Exception ex)
                {
                }
                // details
                foreach (var data in category.Data)
                {
                    try
                    {
                        var url = $"{category.URL.Replace("console", "game")}/{ExtensionMethods.Replace(data.Title, new char[] { '[', ']', '(', ')', '/', '\\', '.', ':', ',', '?' }, " ").Replace(' ', '-')}";

                        Thread.Sleep(200);
                        html = await GetData(url);

                        doc = new HtmlDocument();
                        doc.LoadHtml(html);
                        var attributeNode = HtmlDocumentHelper.GetNodeByParams(doc.DocumentNode, HtmlTag.table, Scrapping.AllPossibilities.Enums.HtmlAttribute.id, "attribute");
                        if (attributeNode != null)
                        {
                            var detailNodes     = attributeNode.SelectNodes(".//tr");
                            var genre           = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Genre:");
                            var releaseDate     = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Release Date:");
                            var rating          = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "ESRB Rating:");
                            var publisher       = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Publisher:");
                            var developer       = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Developer:");
                            var playerCount     = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Player Count:");
                            var upc             = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "UPC:");
                            var asin            = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "ASIN (Amazon):");
                            var epid            = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "ePID (eBay):");
                            var priceChartingId = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "PriceCharting ID:");

                            if (genre != null)
                            {
                                data.Genre = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(genre, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (releaseDate != null)
                            {
                                data.ReleaseDate = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(releaseDate, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (rating != null)
                            {
                                data.Rating = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(rating, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (publisher != null)
                            {
                                data.Publisher = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(publisher, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (developer != null)
                            {
                                data.Developer = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(developer, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (playerCount != null)
                            {
                                data.PlayerCount = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(playerCount, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (upc != null)
                            {
                                data.UPC = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(upc, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (asin != null)
                            {
                                data.Amazon_ASIN = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(asin, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (epid != null)
                            {
                                data.Ebay_ePID = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(epid, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (priceChartingId != null)
                            {
                                data.PriceChartingId = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(priceChartingId, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                        }
                    }
                    catch
                    {
                    }
                }
                await WriteJson(JsonConvert.SerializeObject(category, Formatting.Indented), category.CategoryName);

                await Task.Delay(500);
            }
            await WriteJson(JsonConvert.SerializeObject(result, Formatting.Indented));
        }