예제 #1
0
        protected override void ParseLobbyPage(string url, string doc, out DateTimeOffset serverTime, ref List <ForumThread> threadList)
        {
            Int32 threadId = VBulletinForum.ThreadIdFromUrl(url);
            var   html     = new HtmlAgilityPack.HtmlDocument();

            html.LoadHtml(doc);
            HtmlAgilityPack.HtmlNode root = html.DocumentNode;

            serverTime = DateTime.Now;
            HtmlAgilityPack.HtmlNode timeNode = root.SelectNodes("//div[@id='footer_time']").Last();
            if (timeNode != null)
            {
                String timeText = timeNode.InnerText;
                serverTime = Utils.Misc.ParsePageTime(timeText, DateTime.UtcNow);
            }

            HtmlAgilityPack.HtmlNodeCollection threads = root.SelectNodes("//tbody[contains(@id, 'threadbits_forum_')]/tr");
            if (threads == null)
            {
                return;
            }
            foreach (HtmlAgilityPack.HtmlNode thread in threads)
            {
                ForumThread t = HtmlToThread(threadId, thread, serverTime);
                if (t != null)
                {
                    threadList.Add(t);
                }
            }
        }
        protected void Page_Load(object sender, EventArgs e)
        {
            string heads             = @"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
                            Accept-Encoding:gzip, deflate
                            Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
                            Cache-Control:max-age=0
                            Connection:keep-alive
                            Cookie:showNav=#nav-tab|0|0; navCtgScroll=0; cy=1; cye=shanghai; _lxsdk_cuid=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _lxsdk_s=1693813fded-ea2-2e7-d89%7C%7C51; _lxsdk=1693813fdecc8-062bf66f365a768-11666e4a-384000-1693813fdecc8; _hc.v=7216e9e3-be12-eff4-1836-49d9b0c4b0ce.1551424029; s_ViewType=10
                            Host:www.dianping.com
                            Upgrade-Insecure-Requests:1                           
                            User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64; rv:65.0) Gecko/20100101 Firefox/65.0";
            string url               = "http://www.dianping.com/search/keyword/1/0_%E8%8B%B1%E8%AF%AD%E5%9F%B9%E8%AE%AD/r842";
            ClassHttpRequestClient s = new ClassHttpRequestClient(true);
            string content           = "";
            string response          = s.httpPost(url, heads, content, Encoding.UTF8);

            // Response.Write(response);
            // 第一步声明HtmlAgilityPack.HtmlDocument实例
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            //第二步加载html文档
            doc.LoadHtml(response);
            HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]");
            StringBuilder sb = new StringBuilder();

            foreach (HtmlAgilityPack.HtmlNode item in collection)
            {
                HtmlAgilityPack.HtmlNode divtit     = item.SelectNodes("div[@class=\"tit\"]")[0];
                HtmlAgilityPack.HtmlNode aname      = divtit.SelectNodes("a[1]")[0];     //divtit下面的第一个超级链接
                HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0];
                HtmlAgilityPack.HtmlNode anum       = divcomment.SelectNodes("a[1]")[0]; //divcomment下面的第一个超级链接
                HtmlAgilityPack.HtmlNode aprice     = divcomment.SelectNodes("a[2]")[0]; //divcomment下面的第二个超级链接
                sb.Append(string.Format("{0}---{1}---{2}</br>", aname.InnerText, anum.InnerText, aprice.InnerText));
            }
            Response.Write(sb);
        }
예제 #3
0
        private static void ParseStopsTable(OneWayLine line, HtmlAgilityPack.HtmlNode routeNode, string langName)
        {
            int stopCounter = 0;

            HtmlAgilityPack.HtmlNode titleNode = routeNode.SelectNodes("tr[1]/td[1]/table[1]/tr[1]/td[1]")[0];

            line.From[langName] = titleNode.ChildNodes[0].InnerText.Trim();
            line.To[langName]   = titleNode.ChildNodes[4].InnerText.Trim();
            line.Name[langName] = String.Format("{0} - {1}", line.From[langName], line.To[langName]);

            HtmlAgilityPack.HtmlNodeCollection rows = routeNode.SelectNodes("tr[1]/td[1]/table[1]/tr[@class='SmallTableRow ']");

            if (rows == null)
            {
                Console.WriteLine(routeNode.InnerHtml);
                Console.WriteLine("null collection");
            }
            else
            {
                foreach (HtmlAgilityPack.HtmlNode stopRowNode in rows)
                {
                    //Console.WriteLine (stopNode.InnerHtml);
                    string stopName = stopRowNode.ChildNodes[1].InnerHtml.Trim();
                    //Console.WriteLine("Name: " + stopName);

                    HtmlAgilityPack.HtmlNode linkNode = stopRowNode.ChildNodes[7].ChildNodes[1];
                    string link = linkNode.Attributes["href"].Value;
                    //Console.WriteLine("Link: " + link);

                    Uri linkUri = null;
                    Uri.TryCreate(BusCoIlParser.baseUri, link, out linkUri);
                    NameValueCollection col = System.Web.HttpUtility.ParseQueryString(linkUri.Query);

                    if (line.Stops.ContainsKey(stopCounter))
                    {
                        line.Stops[stopCounter].Name[langName] = stopName;
                        stopCounter++;
                    }
                    else
                    {
                        Stop s = new Stop()
                        {
                            PlaceID = col["PlaceID"] ?? col["PlaceID1"]
                        };
                        s.Name[langName] = stopName;

                        if (s.PlaceID == null)
                        {
                            throw new ApplicationException();
                        }

                        line.Stops.Add(stopCounter++, s);
                    }

                    //Console.WriteLine("----");
                }
            }
        }
예제 #4
0
        } // End Function GetProxyArray

        public static void GetProxyList(string htmlFile, string jsonFile)
        {
            System.Data.DataTable dt = new System.Data.DataTable();

            string html = null;

            if (System.IO.File.Exists(htmlFile))
            {
                html = System.IO.File.ReadAllText(htmlFile);
            }

            if (html == null)
            {
                using (System.Net.WebClient wc = new System.Net.WebClient())
                {
                    html = wc.DownloadString("https://free-proxy-list.net/");
                    System.IO.File.WriteAllText(htmlFile, html, System.Text.Encoding.UTF8);
                } // End Using wc
            }     // End if (html == null)


            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(html);

            string selector = "//table[@id='proxylisttable']";

            HtmlAgilityPack.HtmlNode tableNode = doc.DocumentNode.SelectSingleNode(selector);
            System.Console.WriteLine(tableNode);

            HtmlAgilityPack.HtmlNodeCollection ths = tableNode.SelectNodes("./thead/tr/th");
            foreach (HtmlAgilityPack.HtmlNode th in ths)
            {
                dt.Columns.Add(th.InnerText, typeof(string));
            } // Next th


            HtmlAgilityPack.HtmlNodeCollection trs = tableNode.SelectNodes("./tbody/tr");
            foreach (HtmlAgilityPack.HtmlNode tr in trs)
            {
                System.Data.DataRow dr = dt.NewRow();

                int i = 0;
                HtmlAgilityPack.HtmlNodeCollection tds = tr.SelectNodes("./td");
                foreach (HtmlAgilityPack.HtmlNode td in tds)
                {
                    // System.Console.WriteLine(td);
                    dr[i] = td.InnerText;
                    ++i;
                } // Next td

                dt.Rows.Add(dr);
            } // Next tr

            string json = Newtonsoft.Json.JsonConvert.SerializeObject(dt, Newtonsoft.Json.Formatting.Indented);

            System.IO.File.WriteAllText(jsonFile, json, System.Text.Encoding.UTF8);
        } // End Function GetProxyList
예제 #5
0
        protected List <Bold> ParseBolded(HtmlAgilityPack.HtmlNode original)
        {
            List <Bold> bolded = new List <Bold>();

            HtmlAgilityPack.HtmlNode content = original.CloneNode("Votes", true);
            RemoveQuotes(content); // strip out quotes
            List <String> goodColors = new List <string>()
            {
                _voteColor
            };

            //"darkolivegreen", "darkgreen", "yellowgreen", "seagreen",
            //"lime", "palegreen", "olive", "green"
            RemoveColors(content, goodColors); // strip out colors
            RemoveNewlines(content);           // strip out newlines

            if (_voteColor == "")
            {
                // look for plain bold
                HtmlAgilityPack.HtmlNodeCollection bolds = content.SelectNodes("child::b");
                if (bolds != null)
                {
                    BoldsFromSet(bolds, bolded);
                }
            }
            else
            {
                // look for color,bold.
                foreach (var n in content.SelectNodes("descendant::font") ?? new HtmlAgilityPack.HtmlNodeCollection(content))
                {
                    HtmlAgilityPack.HtmlNodeCollection colorbolds = n.SelectNodes("child::b");
                    if (colorbolds != null)
                    {
                        BoldsFromSet(colorbolds, bolded);
                    }
                }
                // look for bold,color.
                HtmlAgilityPack.HtmlNodeCollection bolds = content.SelectNodes("descendant::b");
                foreach (var n in bolds ?? new HtmlAgilityPack.HtmlNodeCollection(content))
                {
                    HtmlAgilityPack.HtmlNodeCollection boldcolors = n.SelectNodes("child::font");
                    if (boldcolors != null)
                    {
                        BoldsFromSet(boldcolors, bolded);
                    }
                }
                // look for span w/color
                HtmlAgilityPack.HtmlNodeCollection boldspan = content.SelectNodes("descendant::span[starts-with(@style,\"color:red;font-weight:bold;\")]");
                if (boldspan != null)
                {
                    BoldsFromSet(boldspan, bolded);
                }
            }

            return(bolded);
        }
예제 #6
0
        protected virtual void ParseThreadPage(String url, String doc, out Int32 lastPageNumber, out DateTimeOffset serverTime, ref Posts postList)
        {
            Int32 threadId = VBulletinForum.ThreadIdFromUrl(url);

            lastPageNumber = 0;
            var html = new HtmlAgilityPack.HtmlDocument();

            html.LoadHtml(doc);
            HtmlAgilityPack.HtmlNode root = html.DocumentNode;

            serverTime = DateTime.Now;
            //(//div[class="smallfont", align="center'])[last()] All times are GMT ... The time is now <span class="time">time</span>"."

            HtmlAgilityPack.HtmlNode timeNode = root.SelectNodes("//div[@class='smallfont'][@align='center']/span[@class='time']/..").Last();
            if (timeNode != null)
            {
                String timeText = timeNode.InnerText;
                serverTime = Utils.Misc.ParsePageTime(timeText, DateTime.UtcNow);
            }


            // find total posts: /table/tr[1]/td[2]/div[@class="pagenav"]/table[1]/tr[1]/td[1] -- Page 106 of 106
            HtmlAgilityPack.HtmlNode pageNode = root.SelectSingleNode("//div[@class='pagenav']/table/tr/td");
            if (pageNode != null)
            {
                string pages = pageNode.InnerText;
                Match  m     = Regex.Match(pages, @"Page (\d+) of (\d+)");
                if (m.Success)
                {
                    //Trace.TraceInformation("{0}/{1}", m.Groups[1].Value, m.Groups[2].Value);
                    lastPageNumber = Convert.ToInt32(m.Groups[2].Value);
                }
            }

            // //div[@id='posts']/div/div/div/div/table/tbody/tr[2]
            // td[1]/div[1] has (id with post #, <a> with user id, user name.)
            // td[2]/div[1] has title
            // td[2]/div[2] has post
            // "/html[1]/body[1]/table[2]/tr[2]/td[1]/td[1]/div[2]/div[1]/div[1]/div[1]/div[1]/table[1]/tr[2]/td[2]/div[2]" is a post
            HtmlAgilityPack.HtmlNodeCollection posts = root.SelectNodes("//div[@id='posts']//div[contains(@id, 'edit')]/table/tr[2]/td[2]/div[contains(@id, 'post_message_')]");
            if (posts == null)
            {
                return;
            }
            postList = new Posts();
            foreach (HtmlAgilityPack.HtmlNode post in posts)
            {
                Post p = HtmlToPost(threadId, post, serverTime);
                if (p != null)
                {
                    postList.Add(p);
                }
            }
        }
예제 #7
0
        public List <KeyValuePair <int, int> > GetTableHeaderIndexes(HtmlAgilityPack.HtmlNode table, params String[] vals)
        {
            List <KeyValuePair <int, int> > ret = new List <KeyValuePair <int, int> >();

            HtmlAgilityPack.HtmlNodeCollection rows  = table.SelectNodes("tr");
            HtmlAgilityPack.HtmlNodeCollection cells = null;
            if (rows != null)
            {
                HtmlAgilityPack.HtmlNode header = rows[0];
                cells = header.SelectNodes("th|td");
            }
            else
            {
                HtmlAgilityPack.HtmlNodeCollection thead = table.SelectNodes("thead");
                if (thead != null)
                {
                    rows  = thead[0].SelectNodes("tr");
                    cells = rows[0].SelectNodes("th|td");
                }
            }


            int colAbsIdx = 0;

            for (int i = 0; i < cells.Count; ++i)
            {
                HtmlAgilityPack.HtmlNode cell = cells[i];
                String txt = cell.InnerText.Trim();
                for (int j = 0; j < vals.Length; ++j)
                {
                    String match = vals[j];
                    if (txt.IndexOf(match) > -1)
                    {
                        ret.Add(new KeyValuePair <int, int>(colAbsIdx, j));
                        break;
                    }
                }
                HtmlAgilityPack.HtmlAttribute colspan_attr = cell.Attributes["colspan"];
                int colSpan = 1;
                if (colspan_attr != null)
                {
                    if (!int.TryParse(colspan_attr.Value, out colSpan))
                    {
                        colSpan = 1;
                    }
                }
                colAbsIdx += colSpan;
            }
            return(ret);
        }
예제 #8
0
        public static string Submit(this HtmlAgilityPack.HtmlNode form, Encoding encoding)
        {
            if (form == null)
            {
                throw new ArgumentNullException(nameof(form));
            }
            if (encoding == null)
            {
                throw new ArgumentNullException(nameof(encoding));
            }

            string postDataStr = form == null ? string.Empty :
                                 form.SelectNodes("//input").ToList()
                                 .ToDictionary(
                (input => input.GetAttributeValue("name", string.Empty)),
                (input => HttpUtility.UrlEncode(input.GetAttributeValue("value", string.Empty)))
                )
                                 .SerializeData <string>();

            byte[] responseData = HttpRequestUtil.Post(form?.GetAttributeValue("action", null),
                                                       encoding.GetBytes(postDataStr)
                                                       );

            return(encoding.GetString(responseData));
        }
예제 #9
0
 public static void RemoveNewlines(HtmlAgilityPack.HtmlNode node)
 {
     foreach (var n in node.SelectNodes("descendant::br") ?? new HtmlAgilityPack.HtmlNodeCollection(node))
     {
         n.Remove();
     }
 }
예제 #10
0
        static public IDialogueMissionRewardAtom RewardAtomFromHtmlNode(this HtmlAgilityPack.HtmlNode htmlNode)
        {
            if (null == htmlNode)
            {
                return(null);
            }

            var SetComponentTypeAndAmount = new Dictionary <RewardTypeEnum, int>();

            var SetComponentNode = htmlNode?.SelectNodes(".//tr");

            foreach (var Node in SetComponentNode.EmptyIfNull())
            {
                var ImageSrc = Node?.SelectSingleNode(".//img")?.GetAttributeValue("src", "");

                var RewardType = RewardTypeFromImageSrc.TryGetValueNullable(ImageSrc);

                var Amount = (int?)Node?.InnerText?.RegexMatchIfSuccess(Number.DefaultNumberFormatRegexAllowLeadingAndTrailingChars)?.Value?.NumberParseDecimal();

                if (!RewardType.HasValue || !Amount.HasValue)
                {
                    continue;
                }

                SetComponentTypeAndAmount[RewardType.Value] = Amount.Value;
            }

            return(new DialogueMissionRewardAtom()
            {
                Html = htmlNode?.OuterHtml,
                ISK = SetComponentTypeAndAmount?.TryGetValueNullable(RewardTypeEnum.ISK),
                LP = SetComponentTypeAndAmount?.TryGetValueNullable(RewardTypeEnum.LP),
            });
        }
예제 #11
0
        static public DialogueMissionObjective ParseObjectiveAtom(this HtmlAgilityPack.HtmlNode htmlNode)
        {
            if (null == htmlNode)
            {
                return(null);
            }

            try
            {
                var ListTableCell = htmlNode?.SelectNodes(".//td");

                var CompletionCell = ListTableCell?.FirstOrDefault();

                bool?CompleteSelf = null;

                if (CompletionCell?.InnerHtml?.RegexMatchSuccessIgnoreCase(Regex.Escape("38_193")) ?? false)
                {
                    CompleteSelf = true;
                }

                if (CompletionCell?.InnerHtml?.RegexMatchSuccessIgnoreCase(Regex.Escape("38_195")) ?? false)
                {
                    CompleteSelf = false;
                }

                var TypeCell = ListTableCell?.ElementAtOrDefault(2);

                var LastCell = ListTableCell?.LastOrDefault();

                var TypeEnum = TypeCell?.InnerText?.Trim()?.ObjectiveAtomTypeEnumFromTableDialogueText();

                DialogueMissionLocation      Location = null;
                DialogueMissionObjectiveItem Item     = null;

                if (new[] { DialogueMissionObjectiveAtomTypeEnum.Location, DialogueMissionObjectiveAtomTypeEnum.LocationPickUp, DialogueMissionObjectiveAtomTypeEnum.LocationDropOff }.CastToNullable()
                    .Contains(TypeEnum))
                {
                    Location = MissionLocationFromDialogue(LastCell);
                }

                if (new[] { DialogueMissionObjectiveAtomTypeEnum.Item, DialogueMissionObjectiveAtomTypeEnum.Cargo }.CastToNullable()
                    .Contains(TypeEnum))
                {
                    Item = ObjectiveItemFromDialogueText(LastCell?.InnerText);
                }

                return(new DialogueMissionObjective()
                {
                    Html = htmlNode?.OuterHtml,
                    TypeEnum = TypeEnum,
                    Location = Location,
                    Item = Item,
                    CompleteSelf = CompleteSelf,
                });
            }
            catch
            {
                return(null);
            }
        }
예제 #12
0
        public override IList <string> SelectList(HtmlAgilityPack.HtmlNode element)
        {
            List <string> result = new List <string>();
            var           nodes  = element.SelectNodes(_xpath);

            if (nodes != null)
            {
                foreach (var node in nodes)
                {
                    if (!HasAttribute())
                    {
                        result.Add(node.OuterHtml?.Trim());
                    }
                    else
                    {
                        var attr = node.Attributes[_attribute];
                        if (attr != null)
                        {
                            result.Add(attr.Value?.Trim());
                        }
                    }
                }
            }
            return(result);
        }
예제 #13
0
        private void ExpandFieldData(ThinkCrawlField field, dynamic data, HtmlAgilityPack.HtmlNode root, HtmlAgilityPack.HtmlNode parentNode = null)
        {
            string name       = field.Name;
            string fieldXPath = field.XPath;

            if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(fieldXPath))
            {
                SetDynamicValue(data, name, "");
            }
            if (field.Type == ThinkCrawlFieldType.Single)
            {
                SetDynamicValue(data, name, GetHtmlNodeValue(field.Inherit ? parentNode : root, field));
            }
            else if (field.Type == ThinkCrawlFieldType.Group)
            {
                List <dynamic> childList = new List <dynamic>();
                var            nodes     = root.SelectNodes(field.XPath);
                if (nodes != null && nodes.Count > 0)
                {
                    foreach (var node in nodes)
                    {
                        dynamic childData = new ExpandoObject();
                        foreach (var childField in field.Children)
                        {
                            ExpandFieldData(childField, childData, root, node);
                        }
                        childList.Add(childData);
                    }
                }
                SetDynamicValue(data, name, childList);
            }
        }
예제 #14
0
        public static string extractDef(HtmlAgilityPack.HtmlDocument doc)
        {
            string resultText = "";

            HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@class='def-list']");

            foreach (HtmlAgilityPack.HtmlNode node2 in node.SelectNodes(".//section[@class='def-pbk ce-spot']"))
            {
                foreach (HtmlAgilityPack.HtmlNode node3 in node2.SelectNodes(".//header[@class='luna-data-header']"))
                {
                    foreach (HtmlAgilityPack.HtmlNode node4 in node3.SelectNodes(".//span[@class='dbox-pg']"))
                    {
                        Regex  regex1         = new Regex("(<.*?>\\s*)+", RegexOptions.Singleline);
                        string tempText_node4 = node4.OuterHtml;
                        tempText_node4 = regex1.Replace(tempText_node4, " ").Trim();
                        resultText    += "\n" + tempText_node4.Trim() + "\n";
                    }
                }
                foreach (HtmlAgilityPack.HtmlNode node5 in node2.SelectNodes(".//div[@class='def-set']"))
                {
                    Regex  regex1         = new Regex("(<.*?>\\s*)+", RegexOptions.Singleline);
                    Regex  regex2         = new Regex("(.?<div class=\"def-block def-inline-example\">?.*?</div>)+", RegexOptions.Singleline);
                    Regex  regex3         = new Regex(":?", RegexOptions.Singleline);
                    string tempText_node5 = node5.OuterHtml;
                    tempText_node5 = regex2.Replace(tempText_node5, "").Trim();
                    tempText_node5 = regex1.Replace(tempText_node5, " ").Trim();
                    tempText_node5 = regex3.Replace(tempText_node5, "").Trim();
                    resultText    += tempText_node5.Trim() + "\n ";
                }
            }

            Console.WriteLine(resultText + "\n");
            return(resultText);
        }
예제 #15
0
 public static void RemoveComments(HtmlAgilityPack.HtmlNode node)
 {
     foreach (var n in node.SelectNodes("//comment()") ?? new HtmlAgilityPack.HtmlNodeCollection(node))
     {
         n.Remove();
     }
 }
예제 #16
0
        private List <KeyValuePair <string, string> > getLoginFormParams(string body, string username, string password)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(body);
            HtmlAgilityPack.HtmlNode              loginform     = doc.GetElementbyId("gaia_loginform");
            HtmlAgilityPack.HtmlNodeCollection    inputElements = loginform.SelectNodes("//input");
            List <KeyValuePair <string, string> > paramList     = new List <KeyValuePair <string, string> >();
            bool hasPassword = false;

            foreach (HtmlAgilityPack.HtmlNode input in inputElements)
            {
                string name  = input.GetAttributeValue("name", string.Empty);
                string value = input.GetAttributeValue("value", string.Empty);
                if (name == "Email")
                {
                    value = username;
                }
                else if (name == "Passwd")
                {
                    hasPassword = true;
                    value       = password;
                }
                paramList.Add(new KeyValuePair <string, string>(name, value));
            }
            if (!hasPassword)
            {
                paramList.Add(new KeyValuePair <string, string>("Passwd", password));
            }
            return(paramList);
        }
예제 #17
0
 public static void RemoveQuotes(HtmlAgilityPack.HtmlNode node)
 {
     foreach (var n in node.SelectNodes("descendant::td[@class='alt2']") ?? new HtmlAgilityPack.HtmlNodeCollection(node))
     {
         HtmlAgilityPack.HtmlNode div = n.ParentNode.ParentNode.ParentNode;
         div.Remove();
     }
 }
예제 #18
0
        protected void Page_Load(object sender, EventArgs e)
        {
            string heads = @"Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8
Accept-Encoding:gzip, deflate
Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
Cache-Control:max-age=0
Connection:keep-alive
Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20
Host:www.dianping.com
Upgrade-Insecure-Requests:1
Accept:text/html,application/xhtml+xm…plication/xml;q=0.9,*/*;q=0.8
Accept-Encoding:gzip, deflate
Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
Cache-Control:max-age=0
Connection:keep-alive
Cookie:cy=1; _lxsdk_cuid=15ffc822338c…3fb990e3e-b37-f9f-cd5%7C%7C20
Host:www.dianping.com
Upgrade-Insecure-Requests:1
User-Agent:Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/60.0";

            string url = "http://www.dianping.com/shanghai/ch75/g3032";
            ClassHttpRequestClient s = new ClassHttpRequestClient(true);
            string content           = "";
            string response          = s.httpPost(url, heads, content, Encoding.UTF8);

            // Response.Write(response);
            // 第一步声明HtmlAgilityPack.HtmlDocument实例
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            //第二步加载html文档
            doc.LoadHtml(response);
            HtmlAgilityPack.HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//div[@class=\"txt\"]");
            StringBuilder sb = new StringBuilder();

            foreach (HtmlAgilityPack.HtmlNode item in collection)
            {
                HtmlAgilityPack.HtmlNode divtit = item.SelectNodes("div[@class=\"tit\"]")[0];
                HtmlAgilityPack.HtmlNode aname  = divtit.SelectNodes("a[1]")[0]; //divtit下面的第一个超级链接

                HtmlAgilityPack.HtmlNode divcomment = item.SelectNodes("div[@class=\"comment\"]")[0];
                HtmlAgilityPack.HtmlNode anum       = divcomment.SelectNodes("a[1]")[0]; //divcomment下面的第一个超级链接
                HtmlAgilityPack.HtmlNode aprice     = divcomment.SelectNodes("a[2]")[0]; //divcomment下面的第二个超级链接
                sb.Append(string.Format("{0}---{1}---{2}</br>", aname.InnerText, anum.InnerText, aprice.InnerText));
            }
            Response.Write(sb);
        }
예제 #19
0
        private void sb_fillDatatablePSID(string html, string tableId, int editColIndex, Model.htmlTable dt)
        {
            if (string.IsNullOrEmpty(html))
            {
                throw new Exception("html is null or empty");
            }
            if (string.IsNullOrEmpty(tableId))
            {
                throw new Exception("tableId is null or empty");
            }


            if (dt == null)
            {
                throw new Exception("dt is null");
            }
            HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
            document.LoadHtml(html);
            var table = document.GetElementbyId(tableId);

            HtmlAgilityPack.HtmlNodeCollection rows;
            if (table == null)
            {
                return;
            }
            HtmlAgilityPack.HtmlNode tbody = table.SelectSingleNode("tbody");
            if (tbody != null)
            {
                rows = tbody.SelectNodes("tr");
            }
            else
            {
                rows = table.SelectNodes("tr");
            }
            int     i;
            DataRow dr;

            int skipRowCountAtBegining = dt.prp_skipRowTop;
            int skipRowCountAtEnd      = dt.prp_skipRowBottom;

            for (i = skipRowCountAtBegining; i <= rows.Count - 1 - skipRowCountAtEnd; i++)
            {
                if (dt.prp_skipRowIndecies != null && dt.prp_skipRowIndecies.Any(o => o == i))
                {
                    continue;
                }
                var cells = rows[i].SelectNodes("td");
                dr = dt.Rows[i - skipRowCountAtBegining];
                var queryStringCollection = HttpUtility.ParseQueryString(cells[editColIndex].ChildNodes[0].Attributes["href"].Value.Split('?')[1]);

                if (queryStringCollection != null && !Functions.IsNull(queryStringCollection["psid"]))
                {
                    dr[wagonPartsGroupsDataTable.fld_psid] = queryStringCollection["psid"];
                }
                //dt.Rows.Add(dr);
            }
        }
예제 #20
0
        public List <Order> LoadYear(int year, string url)
        {
            List <Order>  result     = new List <Order>();
            List <string> orderPages = new List <string>();
            string        prefix     = new Uri(string.Format(url, year)).GetComponents(UriComponents.SchemeAndServer, UriFormat.SafeUnescaped);

            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(string.Format(url, year));

            request.CookieContainer = cookies;

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            cookies.Add(response.Cookies); // for further requests

            using (StreamReader sr = new StreamReader(response.GetResponseStream()))
            {
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(sr.ReadToEnd());

                HtmlAgilityPack.HtmlNode node = doc.DocumentNode.SelectSingleNode("//ul[@class='a-pagination']");
                if (node != null)
                {
                    foreach (var link in node.SelectNodes(".//a[@href]"))
                    {
                        orderPages.Add(link.Attributes["href"].Value.Trim());
                    }
                    if (orderPages.Count > 1)
                    {
                        orderPages.RemoveAt(orderPages.Count - 1); // last link in list is next button
                    }
                }
                else
                {
                    result.AddRange(ScanOrders(doc.DocumentNode.SelectSingleNode("//div[@id='ordersContainer']"), prefix));
                }
            }

            for (int i = 0; i < orderPages.Count; i++)
            {
                Console.WriteLine("\tpage {0}...", i + 1);
                string page_url = orderPages[i].StartsWith("http") ? orderPages[i] : prefix + orderPages[i];
                request = (HttpWebRequest)WebRequest.Create(page_url);
                request.CookieContainer = cookies;

                response = (HttpWebResponse)request.GetResponse();
                cookies.Add(response.Cookies); // for further requests

                using (StreamReader sr = new StreamReader(response.GetResponseStream()))
                {
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(sr.ReadToEnd());
                    result.AddRange(ScanOrders(doc.DocumentNode.SelectSingleNode("//div[@id='ordersContainer']"), prefix));
                }
            }

            return(result);
        }
예제 #21
0
 public HtmlAgilityPack.HtmlNode findTableByHeaderPattern(params String[] vals)
 {
     HtmlAgilityPack.HtmlNodeCollection tables = GetElementsByTagName("table");
     if (tables == null)
     {
         return(null);
     }
     foreach (HtmlAgilityPack.HtmlNode table in tables)
     {
         HtmlAgilityPack.HtmlNodeCollection rows  = table.SelectNodes("tr");
         HtmlAgilityPack.HtmlNodeCollection cells = null;
         if (rows != null)
         {
             HtmlAgilityPack.HtmlNode header = rows[0];
             cells = header.SelectNodes("th|td");
         }
         else
         {
             HtmlAgilityPack.HtmlNodeCollection thead = table.SelectNodes("thead");
             if (thead != null)
             {
                 rows  = thead[0].SelectNodes("tr");
                 cells = rows[0].SelectNodes("th|td");
             }
         }
         int len      = cells.Count;
         int valIdx   = 0;
         int matchCnt = 0;
         for (int i = 0; i < len; ++i)
         {
             if (i >= cells.Count)
             {
                 break;
             }
             if (valIdx >= vals.Length)
             {
                 break;
             }
             if (cells[i].InnerText.IndexOf(vals[valIdx]) != -1)
             {
                 matchCnt += 1;
                 ++valIdx;
             }
             else
             {
                 valIdx   = 0;
                 matchCnt = 0;
             }
         }
         if (matchCnt == vals.Length)
         {
             return(table);
         }
     }
     return(null);
 }
예제 #22
0
        private List <VideoInfo> GetPageVideos(RssLink category, String pageUrl)
        {
            List <VideoInfo> pageVideos = new List <VideoInfo>();

            if (String.IsNullOrEmpty(pageUrl) && (category.Other != null))
            {
                HtmlAgilityPack.HtmlNode root = (HtmlAgilityPack.HtmlNode)category.Other;

                HtmlAgilityPack.HtmlNodeCollection shows = root.SelectNodes(".//div[contains(@class, 'article-default')]");

                foreach (var show in shows)
                {
                    HtmlAgilityPack.HtmlNode linkNode  = show.SelectSingleNode(".//h3/a");
                    HtmlAgilityPack.HtmlNode thumbNode = show.SelectSingleNode(".//img");

                    VideoInfo videoInfo = new VideoInfo()
                    {
                        Thumb    = Utils.FormatAbsoluteUrl(thumbNode.Attributes["src"].Value, ApetitTvUtil.baseUrl),
                        Title    = linkNode.InnerText,
                        VideoUrl = Utils.FormatAbsoluteUrl(linkNode.Attributes["href"].Value, ApetitTvUtil.baseUrl)
                    };

                    pageVideos.Add(videoInfo);
                }
            }
            else if (!String.IsNullOrEmpty(pageUrl))
            {
                this.nextPageUrl = String.Empty;
                String baseWebData = GetWebData(pageUrl, forceUTF8: true);
                HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();
                document.LoadHtml(baseWebData);

                HtmlAgilityPack.HtmlNodeCollection shows = document.DocumentNode.SelectNodes(".//div[contains(@class, 'article-default')]");

                foreach (var show in shows)
                {
                    HtmlAgilityPack.HtmlNode linkNode  = show.SelectSingleNode(".//h3/a");
                    HtmlAgilityPack.HtmlNode thumbNode = show.SelectSingleNode(".//img");

                    VideoInfo videoInfo = new VideoInfo()
                    {
                        Thumb    = Utils.FormatAbsoluteUrl(thumbNode.Attributes["src"].Value, ApetitTvUtil.baseUrl),
                        Title    = linkNode.InnerText,
                        VideoUrl = Utils.FormatAbsoluteUrl(linkNode.Attributes["href"].Value, ApetitTvUtil.baseUrl)
                    };

                    pageVideos.Add(videoInfo);
                }

                HtmlAgilityPack.HtmlNode nextPageLink = document.DocumentNode.SelectSingleNode(".//li[@class='pager-next']/a");
                this.nextPageUrl = (nextPageLink == null) ? this.nextPageUrl : Utils.FormatAbsoluteUrl(System.Web.HttpUtility.HtmlDecode(nextPageLink.Attributes["href"].Value), pageUrl);
            }

            return(pageVideos);
        }
예제 #23
0
 public static void RemoveColors(HtmlAgilityPack.HtmlNode node, IEnumerable <String> exemptList)
 {
     foreach (var n in node.SelectNodes("descendant::font") ?? new HtmlAgilityPack.HtmlNodeCollection(node))
     {
         String color = n.GetAttributeValue("color", "nocolor");
         color = color.ToLower();
         if (!exemptList.Contains(color))
         {
             n.Remove();
         }
     }
 }
예제 #24
0
        private List <Order> ScanOrders(HtmlAgilityPack.HtmlNode node, string prefix)
        {
            List <Order> orders = new List <Order>();

            foreach (HtmlAgilityPack.HtmlNode order in node.SelectNodes(".//div[contains(@class, 'order')]"))
            {
                HtmlAgilityPack.HtmlNode info = order.SelectSingleNode(".//div[contains(@class, 'order-info')]");
                Order o = new Order();

                if (info != null)
                {
                    HtmlAgilityPack.HtmlNode price = info.SelectSingleNode(".//div[contains(@class, 'a-span2')]//span[contains(@class, 'value')]");
                    if (price != null)
                    {
                        o.Sum = ScanPrice(price.InnerText.Trim());
                    }

                    HtmlAgilityPack.HtmlNode id = info.SelectSingleNode(".//div[contains(@class, 'a-col-right')]//span[contains(@class, 'value')]");
                    if (id != null)
                    {
                        o.Id = id.InnerText.Trim();
                    }

                    HtmlAgilityPack.HtmlNode date = info.SelectSingleNode(".//div[contains(@class, 'a-span4')]//span[contains(@class, 'value')]");
                    if (date != null)
                    {
                        o.Date = ScanDate(date.InnerText.Trim());
                    }
                }

                if (o.IsInitialized())
                {
                    foreach (HtmlAgilityPack.HtmlNode product in order.SelectNodes(".//div[contains(@class, 'a-spacing')]//div[contains(@class, 'a-col-right')]"))
                    {
                        HtmlAgilityPack.HtmlNode name  = product.SelectSingleNode(".//a[contains(@class, 'a-link-normal')]");
                        HtmlAgilityPack.HtmlNode price = product.SelectSingleNode(".//span[contains(@class, 'a-color-price')]");
                        if ((name != null) && (price != null))
                        {
                            Product p = new Product();
                            p.Price = ScanPrice(price.InnerText.Trim());
                            p.Url   = name.Attributes["href"].Value.StartsWith("http") ?
                                      name.Attributes["href"].Value : prefix + name.Attributes["href"].Value;
                            p.Name = WebUtility.HtmlDecode(name.InnerText.Trim());
                            o.Products.Add(p);
                        }
                    }

                    orders.Add(o);
                }
            }

            return(orders);
        }
예제 #25
0
        private List <MonitoringItem> ParsingTitleType(HtmlAgilityPack.HtmlDocument doc)
        {
            List <MonitoringItem> lstItem = new List <MonitoringItem>();

            HtmlAgilityPack.HtmlNode mainArea = doc.GetElementbyId("main-area");
            if (mainArea != null)
            {
                var divArticleBoard = mainArea.SelectNodes("div").FirstOrDefault(x => CommonHelper.HasClass(x, "article-board", "m-tcol-c") && x.Id != "upperArticleList");
                if (divArticleBoard != null)
                {
                    foreach (var trArticle in CommonHelper.GetSingleNode(divArticleBoard, "table", "tbody").SelectNodes("tr"))
                    {
                        var tdArticleWrap = trArticle.SelectNodes("td").FirstOrDefault(x => CommonHelper.HasClass(x, "td_article"));
                        var divArticle    = tdArticleWrap.SelectNodes("div").FirstOrDefault(x => CommonHelper.HasClass(x, "board-list", "inner_list"));

                        string articleUrl   = divArticle.SelectSingleNode("a").GetAttributeValue("href", "");
                        string articleId    = "";
                        string articleTitle = divArticle.SelectSingleNode("a").InnerText.Trim();

                        foreach (var param in articleUrl.Split('&'))
                        {
                            string[] arrParam = param.Split('=');
                            string   paramKey = arrParam[0];

                            if (paramKey == "articleid" && arrParam.Length == 2)
                            {
                                articleId = arrParam[1];
                                break;
                            }
                        }

                        lstItem.Add(new MonitoringItem()
                        {
                            ItemId        = articleId,
                            ItemTitle     = articleTitle,
                            ItemUrlPc     = string.Format(ItemDetailBaseUrlDesktop, articleId),
                            ItemUrlMobile = string.Format(ItemDetailBaseUrlMobile, articleId),
                            ItemDate      = DateTime.Now
                        });
                    }
                }
                else
                {
                    FuncLog($"could not find article area");
                }
            }
            else
            {
                FuncLog($"could not find main-area");
            }

            return(lstItem);
        }
예제 #26
0
        static private Product processProduct(HtmlAgilityPack.HtmlNode node)
        {
            var priceNode = node.SelectNodes("*/*/*/span[@class='price__value']").First();
            var imageNode = node.SelectNodes("*/*/img[@class='item__image']").First();
            var mnfNode   = node.SelectNodes("*/div[@class='item__mnf']").First();
            var nameNode  = node.SelectNodes("*/div[@class='item__name']/*").First();
            var prodNode  = node.SelectNodes("a[@class='link link_no-underline']").First();
            var availNode = node.SelectNodes("div[@class='item__avail']/div").First();

            string avail = availNode.InnerText;

            if (avail == "Со склада")
            {
                avail = "";
            }
            else if (avail == "По запросу")
            {
                avail = null;
            }
            var cost_str    = priceNode.InnerText.Replace(" ", "");
            var cost        = Convert.ToDecimal(cost_str, format);
            var pictureLink = imageNode.Attributes["src"].Value;
            var productLink = site + prodNode.Attributes["href"].Value;
            var name        = nameNode.InnerText;
            var dict        = new Dictionary <string, string>();

            dict.Add("Производитель", mnfNode.InnerText);

            return(new Product(name, cost, avail, dict, productLink, pictureLink));
        }
예제 #27
0
        public static Dictionary <string, string> FormParams(HtmlAgilityPack.HtmlNode node)
        {
            Dictionary <string, string> dicpara = new Dictionary <string, string>();

            HtmlAgilityPack.HtmlNodeCollection InputTypeNodeList = node.SelectNodes(".//input[@type='hidden' and @name and @value]");
            foreach (HtmlAgilityPack.HtmlNode hidenode in InputTypeNodeList)
            {
                string key   = hidenode.Attributes["name"].Value;
                string value = hidenode.Attributes["value"].Value;
                dicpara.Add(key, value);
            }
            return(dicpara);
        }
예제 #28
0
        protected virtual void ParseLobbyPage(string url, string doc, out DateTimeOffset serverTime, ref List <ForumThread> threadList)
        {
            Int32 threadId = VBulletinForum.ThreadIdFromUrl(url);
            var   html     = new HtmlAgilityPack.HtmlDocument();

            html.LoadHtml(doc);
            HtmlAgilityPack.HtmlNode root = html.DocumentNode;

            serverTime = DateTime.Now;
            //(//div[class="smallfont", align="center'])[last()] All times are GMT ... The time is now <span class="time">time</span>"."

            HtmlAgilityPack.HtmlNode timeNode = root.SelectNodes("//div[@class='smallfont'][@align='center']").Last();
            if (timeNode != null)
            {
                String timeText = timeNode.InnerText;
                serverTime = Utils.Misc.ParsePageTime(timeText, DateTime.UtcNow);
            }

            HtmlAgilityPack.HtmlNodeCollection threads = root.SelectNodes("//tbody[contains(@id, 'threadbits_forum_')]/tr");
            if (threads == null)
            {
                return;
            }
            String urlBase = url.Substring(0, url.LastIndexOf('/') + 1);

            foreach (HtmlAgilityPack.HtmlNode thread in threads)
            {
                ForumThread t = HtmlToThread(threadId, thread, serverTime);
                if (t != null)
                {
                    if (!t.URL.StartsWith("http"))
                    {
                        t.URL = urlBase + t.URL;
                    }
                    threadList.Add(t);
                }
            }
        }
예제 #29
0
        public static List <Event> GetDataScreenScrap()
        {
            string body = Utils.GetBody("http://www.ynet.co.il/home/0,7340,L-184,00.html");

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(body);
            HtmlAgilityPack.HtmlNode           root  = doc.DocumentNode;
            HtmlAgilityPack.HtmlNodeCollection nodes = root.SelectNodes("/html[1]/body/div[4]/table[3]/tr/td[2]/table[2]/tr/td/table/tr/td/table");
            if (nodes == null || nodes.Count == 0)
            {
                return(null);
            }
            HtmlAgilityPack.HtmlNode table  = nodes.ElementAt(0);
            List <string>            events =
                (from t in table.SelectNodes("tr")
                 where !string.IsNullOrEmpty(t.InnerText.Trim()) && !t.InnerText.Contains("מבזקים")
                 select t.InnerText).ToList();

            return
                ((from t in table.SelectNodes("tr")
                  where !string.IsNullOrEmpty(t.InnerText.Trim()) && !t.InnerText.Contains("מבזקים")
                  select Event.FromHtmlNode(t)).ToList());
        }
예제 #30
0
        private void GetPagesRecursive(HtmlAgilityPack.HtmlNode document, IDictionary <int, string> pages)
        {
            var chapterPages = document.SelectNodes(@"//div[@class=""main-body""]//div[@class=""btn-group""][2]/ul[@class=""dropdown-menu""]/li/a");

            if (chapterPages == null)
            {
                throw new ParserException("Could not find expected elements on website.", document.InnerHtml);
            }

            int addedCount = 0;

            foreach (var pageLink in chapterPages)
            {
                int pageNumber = 0;
                var url        = GetFullUrl(pageLink.Attributes["href"].Value);

                if (pages.Any(kvp => kvp.Value == url)) // skip duplicate urls
                {
                    continue;
                }

                if (!Int32.TryParse(Regex.Match(pageLink.InnerText, @"\d+").Value, out pageNumber))
                {
                    _log.Error("Unable to parse page number '" + pageLink.InnerText + "'");
                }

                if (pages.ContainsKey(pageNumber))  // if page is already in dictionary use random number instead
                {
                    pageNumber = Random;
                }

                pages.Add(pageNumber, url);
                addedCount++;
            }

            if (addedCount > 0)
            {
                var pageRecord = pages.OrderByDescending(kvp => kvp.Key).Skip(1).FirstOrDefault();
                if (pageRecord.Equals(default(KeyValuePair <int, string>)))
                {
                    return;
                }

                var nextDocument = WebHelper.GetHtmlDocument(pageRecord.Value);

                GetPagesRecursive(nextDocument, pages);
            }
        }