public void AddTest() { HtmlElement root = new HtmlElement("root"); HtmlNodeCollection target = new HtmlNodeCollection(root); HtmlElement child = new HtmlElement("child"); int index = target.Add(child); Assert.AreEqual(root, child.Parent); Assert.AreEqual(index, 0); target.Add(null); }
static void ListAllHyperlinks(HtmlNodeCollection nodes) { foreach (HtmlNode node in nodes) { HtmlElement element = node as HtmlElement; if (null != element) { if ((element.Name.ToLower() == "a") && element.Attributes.Contains("href")) { System.Console.WriteLine(element.ToString()); } ListAllHyperlinks((node as HtmlElement).Nodes); } } }
/// <summary> /// Selects a list of nodes matching the <see cref="XPath"/> expression. /// </summary> /// <param name="xpath">The XPath expression.</param> /// <returns>An <see cref="HtmlNodeCollection"/> containing a collection of nodes matching the <see cref="XPath"/> query, or <c>null</c> if no node matched the XPath expression.</returns> public HtmlNodeCollection SelectNodes(string xpath) { HtmlNodeCollection list = new HtmlNodeCollection(null); HtmlNodeNavigator nav = new HtmlNodeNavigator(OwnerDocument, this); XPathNodeIterator it = nav.Select(xpath); while (it.MoveNext()) { HtmlNodeNavigator n = (HtmlNodeNavigator)it.Current; list.Add(n.CurrentNode); } if (list.Count == 0) { return null; } return list; }
public void FindByAttributeNameTest() { HtmlElement root = new HtmlElement("root"); HtmlNodeCollection target = new HtmlNodeCollection(root); target.Add(new HtmlElement("first")); target.Add(new HtmlElement("second")); target.Add(new HtmlElement("third")); ((HtmlElement)target[0]).Nodes.Add(new HtmlElement("secondchild")); ((HtmlElement)target[1]).Attributes.Add(new HtmlAttribute("firstattribute")); ((HtmlElement)target[1]).Attributes.Add(new HtmlAttribute("secondattribute")); ((HtmlElement)target[2]).Attributes.Add(new HtmlAttribute("firstattribute")); Assert.AreEqual(target.FindByAttributeName("firstattribute").Count, 2); ((HtmlElement)((HtmlElement)target[0]).Nodes[0]).Attributes.Add(new HtmlAttribute("firstattribute")); Assert.AreEqual(target.FindByAttributeName("firstattribute", false).Count, 2); Assert.AreEqual(target.FindByAttributeName("firstattribute", true).Count, 3); }
public void HtmlNodeCollectionConstructorTest() { HtmlElement root = new HtmlElement("root"); HtmlNodeCollection target = new HtmlNodeCollection(root); HtmlElement child = new HtmlElement("child"); target.Add(child); Assert.AreEqual(root, child.Parent); }
private async Task LoadMarkedTask(string uri) { MarkedList.Clear(); int o = 0; while (true) { string reqUri = uri + o; string responseString = await SendRequest.GET(reqUri); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(responseString); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(@"//form[@name='frm_mis']/tr"); int count = 0; Regex regex = new Regex(@"# \d+"); for (int i = 0; i < nodes.Count; i++) { if (i == 0 || i == nodes.Count - 1) { continue; } string complete = nodes[i].SelectSingleNode(@"./td[1]/table/tr[@class='m2']//a").GetAttributeValue("href", ""); string mark = ""; // nodes[i].SelectSingleNode(@"./td[1]/table/tr[2]//a").GetAttributeValue("href", ""); string targetIP = nodes[i].SelectSingleNode(@"./td[2]/table/tr[1]//a/span[@class='green']").InnerText; string type = nodes[i].SelectSingleNode(@"./td[3]/a/span").InnerText; string detail = StringHelper.RemoveSpecial(nodes[i].SelectSingleNode(@"./td[4]//td[@align='justify']").InnerText); string reward = nodes[i].SelectSingleNode(@"./td[5]").InnerText; string fileID = ""; var match = regex.Match(detail); if (match.Success) { fileID = match.Value.Replace("#", "").Trim(); } MissionModel newData = new MissionModel() { Complete = complete, Mark = mark, TargetIP = targetIP, Type = type, Details = detail, Reward = reward, FileID = fileID }; MarkedList.Add(newData); count++; } if (count == 0) { break; } o += 50; } }
/// <summary> /// This constructs a new Html element with the specified tag attributeName. /// </summary> public HtmlElement(string name) { this.nodes = new HtmlNodeCollection(this); this.attributes = new HtmlAttributeCollection(this); this.name = name; }
/// <summary> /// Get friend helper method. This method support for GetFriend /// </summary> /// <param name="firstFriendPageUrl">Url</param> /// <param name="type"> /// Type = 1 if you want to get other friends. /// Type = 2 if you want to get your friend. /// </param> /// <returns>List of facebook user id</returns> List <string> GetFriends(string firstFriendPageUrl) { // Declare list string to store user id var friends = new List <string>(); // using queue to remove recursive search over pages var friendPages = new Queue <string>(); friendPages.Enqueue(firstFriendPageUrl); // we will loop to the end of friend list while (friendPages.Count > 0) { string currentFriendPageUrl = friendPages.Dequeue(); string currentHtmlContent = _http.DownloadContent(currentFriendPageUrl); HtmlNode docNode = HtmlHelper.BuildDom(currentHtmlContent); // select root node which is div element with id is root, does not real root node HtmlNode rootNode = docNode.SelectSingleNode("//div[@id='root']"); // Because selectNodes and SelectSingleNode method working incorrect. // Both method working in entire document node rather than current node when we pass relative xpath expression. // So, we maybe get more element than expect. // E.g : Trying select table element with role=presentation in rootNode will search in entire document // var friendTables1 = rootNode.SelectNodes("//table[@role='presentation']"); // If we only want to search in InnerHtml of current node, just load it to another HtmlDocument object // Maybe isn't the best way to do our job. But at the moment, i think it still good. docNode = HtmlHelper.BuildDom(rootNode.InnerHtml); // Now search table in new document HtmlNodeCollection friendAnchors = docNode.SelectNodes("//table[@role='presentation']/tr/td[2]/a") ?? docNode.SelectNodes("//table[@role='presentation']/tbody/tr/td[2]/a"); if (friendAnchors == null) { return(friends); } // Loop through all node and trying to get user alias or id foreach (HtmlNode friendAnchor in friendAnchors) { string id = string.Empty; string userProfileHref = friendAnchor.GetAttributeValue("href", null); if (userProfileHref != null) { if (!userProfileHref.Contains("profile.php")) { // if userProfileHref does't contain "profile.php", userProfileHref contain user alias. // E.g : https://m.facebook.com:443/user.alias.here?fref=fr_tab&refid=17/about int questionMarkIndex = userProfileHref.IndexOf("?"); if (questionMarkIndex > -1) { userProfileHref = userProfileHref.Substring(1, questionMarkIndex - 1); } else { userProfileHref = userProfileHref.Substring(1); } friends.Add(userProfileHref); } else { // Extract user id from href profile.php?id=user_id&fre... // If extract not success then we need to log this error Match match = CompiledRegex.Match("UserId", userProfileHref); if (match.Success) { friends.Add(match.Groups["id"].Value); } else { _logger.WriteLine("Match user id by CompiledRege.Match(UserId) is fail. Addition info : url=" + firstFriendPageUrl + " and user profile is " + userProfileHref); } } } else { // If we go to this code block, there are some case happend : // - Our bot has been block by this user or facebook. // - This is deleted user. // - We need provide more pattern to detect user id // now i will log it for later fix _logger.WriteLine("Maybe " + friendAnchor.InnerText + " has been banned. Access this link from browser to check again."); } } HtmlNode moreFriend = rootNode.SelectSingleNode("//div[@id='m_more_friends']/a"); if (moreFriend == null) { continue; } var nextUrl = WebUtility.HtmlDecode(moreFriend.GetAttributeValue("href", null)); if (nextUrl != null) { friendPages.Enqueue("https://m.facebook.com" + nextUrl); } else { _logger.WriteLine("This is last page."); } } return(friends); }
private void ParseAndInsertIntoList(string homePage, HtmlDocument htmlDoc) { HtmlNodeCollection nodeCollectionLinks = htmlDoc.DocumentNode.SelectNodes("//article/div/div/div/div/a"); foreach (HtmlNode link in nodeCollectionLinks) { ImmobilienProperties = new Dictionary <string, string>(); string propertiesLink = link.Attributes["href"].Value; if (!propertiesLink.Contains(homePage)) { propertiesLink = homePage + link.Attributes["href"].Value; } ImmobilienProperties["link"] = propertiesLink; HtmlWeb propertyWeb = new HtmlWeb(); try { HtmlDocument htmlPropertyDoc = propertyWeb.Load(propertiesLink); ImmobilienProperties["html"] = htmlPropertyDoc.DocumentNode.InnerHtml; //title HtmlNodeCollection titles = htmlPropertyDoc.DocumentNode.SelectNodes("//div/div/div/div/div/h1"); ImmobilienProperties[Constants.Db.title] = titles[0].InnerText; //etage string dbType = ""; HtmlNodeCollection types = htmlPropertyDoc.DocumentNode.SelectNodes("//div/div/div/div/div/div/div/div/dl"); HtmlNodeCollection address = htmlPropertyDoc.DocumentNode.SelectNodes("//div/div/div/div/div/span/div"); string strAddress = " "; foreach (HtmlNode ndAddress in address) { strAddress = strAddress + " " + ndAddress.InnerText; } ImmobilienProperties[Constants.Db.address] = strAddress; if (!(types is null)) { foreach (HtmlNode type in types) { //Etage ParseHtml(type, Constants.Html.etage, Constants.Db.typ, new int[] { 2 }, ImmobilienProperties); //Typ ParseHtml(type, Constants.Html.typ, Constants.Db.typ, new int[] { 2 }, ImmobilienProperties); //Wohnflaeche ParseHtml(type, Constants.Html.wohnflaeche, Constants.Db.wohnflaeche, new int[] { 3 }, ImmobilienProperties); //Bezugsfrei ParseHtml(type, Constants.Html.bezugsfrei, Constants.Db.bezugsfrei, new int[] { 3 }, ImmobilienProperties); //Bonitätsauskunft ParseHtml(type, Constants.Html.bonitaetsauskunft, Constants.Db.bonitaetsauskunft, new int[] { 2 }, ImmobilienProperties); //Zimmer ParseHtml(type, Constants.Html.zimmer, Constants.Db.zimmer, new int[] { 2 }, ImmobilienProperties); //Kaltmiete ParseHtml(type, Constants.Html.kaltmiete, Constants.Db.kaltmiete, new int[] { 2 }, ImmobilienProperties); //Nebenkosten ParseHtml(type, Constants.Html.nebenkosten, Constants.Db.nebenkosten, new int[] { 3 }, ImmobilienProperties); //Gesamtmiete ParseHtml(type, Constants.Html.gesamtmiete, Constants.Db.gesamtmiete, new int[] { 2 }, ImmobilienProperties); dbType = type.InnerText; } } //ListOfImmobilienProperties.Add(ImmobilienProperties); DbManagement dbManagement = new DbManagement(); dbManagement.UpdateOrInsert(ImmobilienProperties); } catch (Exception e) { Console.WriteLine(DateTime.Now + " Link: " + propertiesLink + ", Exception: " + e.Message); } } }
private HtmlDocument(StreamReader reader) { this.nodes = HtmlParser.Parse(reader); }
public override async Task <List <Book> > GetBestsellersAsync() { var bookList = new List <Book>(); string url = "https://aros.pl/"; string htmlCode = ""; using (WebClient client = new WebClient() { Encoding = System.Text.Encoding.UTF8 }) { try{ await Task.Run(() => { htmlCode = client.DownloadString(url); }); } catch (Exception ex) { Log.Error($"Network Error in Aros: {ex.Message}"); } } var htmlDoc = new HtmlDocument(); if (!string.IsNullOrEmpty(htmlCode)) { HtmlNodeCollection bestList = null; try { await Task.Run(() => { htmlDoc.LoadHtml(htmlCode); }); var bestSel = htmlDoc.DocumentNode.SelectNodes("//b").FirstOrDefault(x => x.InnerText == "Bestsellery"); var bestListhtml = bestSel.ParentNode.ParentNode.InnerHtml; htmlDoc.LoadHtml(bestListhtml); bestList = htmlDoc.DocumentNode.SelectNodes("//tr"); } catch (Exception ex) { Log.Error($"Html Error in Aros: {ex.Message}"); Log.Error(ex.StackTrace); } if (bestList != null) { foreach (var bestNode in bestList) { try { var htmlDocSingle = new HtmlDocument(); htmlDocSingle.LoadHtml(bestNode.InnerHtml); var href = htmlDocSingle.DocumentNode.SelectSingleNode("//a"); var urlAddress = href.Attributes["href"].Value; urlAddress = "https://www.aros.pl" + urlAddress; if (bestNode.InnerHtml.Contains("autor")) { string htmlBook = ""; using (WebClient client = new WebClient()) { var htmlData = client.DownloadData(urlAddress); htmlBook = Encoding.UTF8.GetString(htmlData); } var htmlDocBook = new HtmlDocument(); htmlDocBook.LoadHtml(htmlBook); var titleNode = htmlDocBook.DocumentNode.SelectSingleNode("//h1"); var title = titleNode.InnerHtml.Trim(); var mainNode = titleNode.ParentNode.ParentNode.ParentNode; var authorNode = mainNode.InnerHtml; var authorDoc = new HtmlDocument(); authorDoc.LoadHtml(authorNode); var author = authorDoc.DocumentNode.SelectSingleNode("//b").InnerText; var imgSrc = authorDoc.DocumentNode.SelectNodes("//img") .FirstOrDefault(x => x.Attributes["alt"].Value == title).Attributes["src"].Value; //Attributes["alt"].Value; imgSrc = "https:" + imgSrc; var book = new Book(title, author, imgSrc, "Aros"); await book.SetSizeAsync(); bookList.Add(book); //Console.WriteLine(book.Title); } } catch (Exception e) { Log.Error($"BOOK error in Aros: {e.Message}"); Log.Error(e.StackTrace); } } } } return(bookList); }
public static List <Post> RunScrubOnADoc(HtmlDocument doc, List <Player> players, List <Replacement> replacements, List <string> moderatorNames) { List <Post> postsOnThisPage = new List <Post>(); try { HtmlNodeCollection divs = doc.DocumentNode.SelectNodes("//div"); Regex reg = new Regex("^p[0-9]", RegexOptions.IgnoreCase); foreach (HtmlNode div in divs) { string id = div.GetAttributeValue("id", string.Empty); if (reg.IsMatch(id)) { HtmlNode postProfile = div.SelectSingleNode(".//dl[contains(@class,'postprofile')]"); string postAuthor = postProfile.SelectSingleNode(".//a").InnerHtml; //displayString += postAuthor; //We don't care about votes the mod has made. bool wasPostByModerator = false; foreach (string moderatorName in moderatorNames) { if (Player.makeNameFriendly(postAuthor).Equals(Player.makeNameFriendly(moderatorName))) { wasPostByModerator = true; break; } } if (wasPostByModerator == true) { continue; } HtmlNode postbody = div.SelectSingleNode(".//div[contains(@class,'postbody')]"); string pbInnerText = postbody.InnerText; string pbInnerHtml = postbody.InnerHtml; int postNumber = Int32.Parse(postbody.SelectSingleNode(".//strong").InnerHtml.Replace("#", "")); Player playerVoting = Player.FindPlayerByNameUserAid(players, postAuthor); if (playerVoting == null) { Player.FindPlayerByNameUserAidReplacementsLoop(players, postAuthor, replacements); } if (playerVoting == null) { playerVoting = PlayerNameWordSearch(players, replacements, Player.makeNameFriendly(postAuthor)); } //Required for UTF8 issues. For some reason the encoding/decoding on the byte array f***s everything to hell. Wasn't an issue before. if (playerVoting == null) { playerVoting = players.returnClosestPlayerLevensheinDistance(postAuthor); } // if (playerVoting != null) //{ // playerVoting.addPostNumber(postNumber); // } HtmlNode authorNode = postbody.SelectSingleNode(".//p[contains(@class,'author')]"); HtmlNode firstURL = authorNode.SelectSingleNode(".//a"); string hrefValue = firstURL.Attributes["href"].Value; string directPostLocation = hrefValue.Substring(hrefValue.IndexOf("#")).Replace("#p", "#"); string bbCode = "[url=https://forum.mafiascum.net/viewtopic.php?p=" + directPostLocation.Replace("#", "") + directPostLocation + "]" + postNumber + "[/url]"; string dateTextStart = authorNode.InnerText; dateTextStart = dateTextStart.Replace("Post #" + postNumber + " \n\t\t\t\t \n\t\t\t\t\t(ISO) \n\t\t\t\t \n\t\t\t » ", "").Trim(); DateTime timeOfPost; bool isValidDate = DateTime.TryParse(dateTextStart, out timeOfPost); if (!isValidDate) { throw new ArgumentException("Bad date provided for post: " + postNumber); } Post post = null; if (playerVoting != null) { post = new Post(postNumber, playerVoting, timeOfPost, bbCode); } HtmlNode content = postbody.SelectSingleNode(".//div[contains(@class,'content')]"); var quotes = content.SelectNodes(".//blockquote"); HtmlNode contentMinusQuotes = content; if (quotes != null) { foreach (var quote in quotes) { try { quote.Remove(); } catch { } } } HtmlNodeCollection spoilerTags = contentMinusQuotes.SelectNodes(".//div[contains(@class,'quotetitle')]"); HtmlNodeCollection spoilerContentTags = contentMinusQuotes.SelectNodes(".//div[contains(@class,'quotecontent')]"); if (spoilerTags != null) { try { foreach (HtmlNode spoiler in spoilerTags) { spoiler.Remove(); } foreach (HtmlNode spoilerContent in spoilerContentTags) { spoilerContent.Remove(); } } catch { } } try { post.ExtractVoteFromPost(players, contentMinusQuotes, replacements); postsOnThisPage.Add(post); } catch (Exception e) { List <string> playerNames = new List <string>(); foreach (Player player in players) { playerNames.Add(player.Name); } UnityEngine.Debug.Log(playerVoting == null ? "Unable to get original player posting. Check for typos in settings." : "Some other error occurred in getting the vote. "); } } } } catch (Exception e2) { UnityEngine.Debug.Log("Major unknown error occurred. Run me in debug. "); } return(postsOnThisPage); }
public static string ScrubPostForSettings(string url, int postNumberInput) { try { url = "https://forum.mafiascum.net/viewtopic.php?" + url + "&start=" + postNumberInput; //return "Url: " + url + " Post Number Input: " + postNumberInput; Regex reg = new Regex("^p[0-9]", RegexOptions.IgnoreCase); HtmlDocument doc = URLReadLogic.URLReadLogic.GetHTMLDocumentFromURL(url); if ((doc == null) || doc.DocumentNode == null || doc.DocumentNode.SelectSingleNode("//div") == null) { throw new ArgumentNullException("Could not connect with mafiascum. Try again later."); } HtmlNode div = doc.DocumentNode.SelectSingleNode("//div"); HtmlNode postbody = div.SelectSingleNode(".//div[contains(@class,'postbody')]"); string divInnerHtml = div.InnerHtml; HtmlNode content = postbody.SelectSingleNode(".//div[contains(@class,'content')]"); HtmlNodeCollection spoilerTags = content.SelectNodes(".//div[contains(@class,'quotetitle')]"); HtmlNodeCollection spoilerContentTags = content.SelectNodes(".//div[contains(@class,'quotecontent')]"); //string displayString = ""; int i = 0; //bool settingsFound = false; if (spoilerTags == null) { throw new ArgumentNullException("No spoiler tags in given post. Please verify URL and post number."); } foreach (HtmlNode spoilerNode in spoilerTags) { HtmlNode spoilerHeaderTextNode = spoilerNode.SelectSingleNode(".//b"); if (spoilerHeaderTextNode != null) { if (spoilerNode.SelectSingleNode(".//b").InnerText.Trim().Equals(VOTE_COUNT_SPOILER_NAME)) { break; } else { i++; } } else { i++; } } if (i < spoilerContentTags.Count) { return(spoilerContentTags[i].SelectSingleNode(".//div").InnerHtml.Trim()); } else { throw new ArgumentNullException("Could not find vote counter settings. Check spoiler tag name."); } } catch (Exception e) { throw new ArgumentNullException("Could load settings data. Check input."); } }
public override void ParseMatchPageHtml(HtmlDocument doc, string url) { Sport sport = GetSport(doc); if (sport == Sport.NotSupported) { return; } MatchName matchName = GetFullMatchName(doc); // повне ім'я (only for tennis cuz all event names were writed like (Coppejans K.) ) if (matchName == null) { return; } string BetUrl = url; HtmlNodeCollection betsNodes = doc.DocumentNode.SelectNodes("//td"); Bet result = null; if (betsNodes == null) { return; } foreach (var node in betsNodes) { result = null; //if (node.Attributes["data-market-type"] != null) continue; HtmlAttribute attribute = node.Attributes["data-sel"]; if (attribute == null) { continue; } string value = attribute.Value.Replace("\"", string.Empty).Replace(""", string.Empty); JavaSelectCode = "(function() { var element = document.evaluate( '" + node.XPath + "' ,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue; element.click(); })();"; string coeff = value.Split(new string[] { ",epr:", ",prices:{" }, StringSplitOptions.RemoveEmptyEntries)[1]; double Probability = Convert.ToDouble(coeff.Replace(".", ",")); string type = value.Split(new string[] { "sn:", ",mn:" }, StringSplitOptions.RemoveEmptyEntries)[1]; string TotalorHand = value.Split(new string[] { "mn:", ",ewc:" }, StringSplitOptions.RemoveEmptyEntries)[1]; if (TotalorHand.Contains(" + ")) { continue; } if (TotalorHand.ToLower().Contains("min.")) { continue; } Team team = GetTeam(TotalorHand, matchName); Time time = GetTime(TotalorHand); #region main bets if (TotalorHand.Contains("Match Result") || TotalorHand == "Result" || TotalorHand.Contains("Match Winner Including All OT") || (TotalorHand.Contains("Result") && TotalorHand.Contains("Set")) || TotalorHand.Contains("Normal Time Result") || TotalorHand.Contains("To Win Match") && !TotalorHand.Contains("Handicap")) { if (TotalorHand.Contains("Match Winner Including All OT")) { if (type == matchName.FirstTeam) { if (node.Attributes["data-market-type"] != null && node.Attributes["data-market-type"].Value == "RESULT_2WAY") { result = new ResultBet(ResultBetType.P1, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } } else if (type == matchName.SecondTeam) { if (node.Attributes["data-market-type"] != null && node.Attributes["data-market-type"].Value == "RESULT_2WAY") { result = new ResultBet(ResultBetType.P2, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } } } if (TotalorHand.Contains("To Win Match")) { if (type == matchName.FirstTeam) { if (node.Attributes["data-market-type"] != null && node.Attributes["data-market-type"].Value == "RESULT_2WAY") { result = new ResultBet(ResultBetType.P1, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } } else if (type == matchName.SecondTeam) { if (node.Attributes["data-market-type"] != null && node.Attributes["data-market-type"].Value == "RESULT_2WAY") { result = new ResultBet(ResultBetType.P2, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } } } if (type == matchName.FirstTeam + " To Win") { if (node.Attributes["data-market-type"] != null && node.Attributes["data-market-type"].Value == "RESULT_2WAY") { result = new ResultBet(ResultBetType.P1, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } else { result = new ResultBet(ResultBetType.First, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } } else if (type == "Draw") { result = new ResultBet(ResultBetType.Draw, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } else if (type == matchName.SecondTeam + " To Win") { if (node.Attributes["data-market-type"] != null && node.Attributes["data-market-type"].Value == "RESULT_2WAY") { result = new ResultBet(ResultBetType.P2, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } else { result = new ResultBet(ResultBetType.Second, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } } else if (type == matchName.FirstTeam + " To Win or Draw") { result = new ResultBet(ResultBetType.FirstOrDraw, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } else if (type == matchName.FirstTeam + " To Win or " + matchName.SecondTeam + " To Win") { result = new ResultBet(ResultBetType.FirstOrSecond, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } else if (type == matchName.SecondTeam + " To Win or Draw") { result = new ResultBet(ResultBetType.SecondOrDraw, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } } #endregion else #region Totals for All Team // Totals if (TotalorHand.Contains("Total") && !TotalorHand.Contains("Sets") && !TotalorHand.Contains("Innings")) { if (type.Contains("Under")) { if (TotalorHand.Contains("Asian")) { try { double first_param = Convert.ToDouble(type.Split(new string[] { "Under ", "," }, StringSplitOptions.RemoveEmptyEntries)[0].Replace(".", ",")); double second_param = Convert.ToDouble(type.Split(new string[] { "Under ", "," }, StringSplitOptions.RemoveEmptyEntries)[1].Replace(".", ",")); double param = (first_param + second_param) / 2; result = new TotalBet(TotalBetType.Under, param, time, team, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } catch { } } else { try { double param = Convert.ToDouble(type.Split(new string[] { "Under ", "\0" }, StringSplitOptions.RemoveEmptyEntries)[0].Replace(".", ",")); result = new TotalBet(TotalBetType.Under, param, time, team, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } catch { } } } if (type.Contains("Over")) { if (TotalorHand.Contains("Asian")) { try { double first_param = Convert.ToDouble(type.Split(new string[] { "Over ", "," }, StringSplitOptions.RemoveEmptyEntries)[0].Replace(".", ",")); double second_param = Convert.ToDouble(type.Split(new string[] { "Over ", "," }, StringSplitOptions.RemoveEmptyEntries)[1].Replace(".", ",")); double param = (first_param + second_param) / 2; result = new TotalBet(TotalBetType.Over, param, time, team, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } catch { } } else { try { double param = Convert.ToDouble(type.Split(new string[] { "Over ", "\0" }, StringSplitOptions.RemoveEmptyEntries)[0].Replace(".", ",")); result = new TotalBet(TotalBetType.Over, param, time, team, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } catch { } } } } #endregion else #region All Handicaps if (TotalorHand.Contains("Handicap") || TotalorHand.Contains("Draw No Bet")) // переробити. draw no bet = 0 все інше нормально. з ханд робити { if (type.Contains(matchName.FirstTeam)) { double param = 0; string test; if (TotalorHand.Contains("Draw No Bet")) { param = 0; } else { if (TotalorHand.Contains("Asian")) { try { double first_param = Convert.ToDouble(type.Split(new string[] { matchName.FirstTeam + " (", ")", "," }, StringSplitOptions.RemoveEmptyEntries)[0].Replace(".", ",")); double second_param = Convert.ToDouble(type.Split(new string[] { matchName.FirstTeam + " (", ")", "," }, StringSplitOptions.RemoveEmptyEntries)[1].Replace(".", ",")); param = (first_param + second_param) / 2; } catch { } } else { try { test = type.Split(new string[] { matchName.FirstTeam + " (", ")" }, StringSplitOptions.RemoveEmptyEntries)[0].Replace(".", ","); param = Convert.ToDouble(test); } catch { continue; } } } result = new HandicapBet(HandicapBetType.F1, param, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } if (type.Contains(matchName.SecondTeam)) { double param = 0; string test; if (TotalorHand.Contains("Draw No Bet")) { param = 0; } else { if (TotalorHand.Contains("Asian")) { try { double first_param = Convert.ToDouble(type.Split(new string[] { matchName.SecondTeam + " (", ")", "," }, StringSplitOptions.RemoveEmptyEntries)[0].Replace(".", ",")); double second_param = Convert.ToDouble(type.Split(new string[] { matchName.SecondTeam + " (", ")", "," }, StringSplitOptions.RemoveEmptyEntries)[1].Replace(".", ",")); param = (first_param + second_param) / 2; } catch { } } else { try { test = type.Split(new string[] { matchName.SecondTeam + " (", ")" }, StringSplitOptions.RemoveEmptyEntries)[0].Replace(".", ","); param = Convert.ToDouble(test); } catch { continue; } } } result = new HandicapBet(HandicapBetType.F2, param, time, Probability, matchName, BetUrl, JavaSelectCode, sport, Maker); } } #endregion if (result != null) { int index = BetList.IndexOf(result); if (index != -1) { BetList[index].ChangeOdds(result.Odds); } else { BetList.Add(result); } } } System.Threading.Thread.Sleep(50); }
/// <summary> /// 影片信息获取 /// </summary> /// <param name="childPageId"></param> public Video GetVideoInfo(int videoId) { try { string videoUrl = baseStr + "?m=vod-detail-id-" + videoId + ".html"; Video videoInfo = new Video(); HtmlNode rootnode1 = GetHtmlDoc(videoUrl); // HtmlNode vodImg = rootnode1.SelectSingleNode("//div[@class='vodImg']"); HtmlNode vodInfo = rootnode1.SelectSingleNode("//div[@class='vodInfo']"); HtmlNode vodImgSrc = vodImg.SelectSingleNode("//img[@class='lazy']/@src"); videoInfo.videoImgUrl = vodImgSrc.Attributes["src"].Value; //影片图片地址 videoInfo.videoName = vodImgSrc.Attributes["alt"].Value; //影片名 HtmlNode vodInfoVodh = vodInfo.SelectSingleNode("//div[@class='vodh']"); videoInfo.videoType = vodInfoVodh.SelectSingleNode("span").InnerText; videoInfo.videoQuality = vodInfoVodh.SelectSingleNode("label").InnerText; //影片详情 try { var htmlVodinfobox = vodInfo.SelectNodes("//div[@class='vodinfobox']/ul/li").Where(FuncVodinfobox); List <string> tset = (from a in htmlVodinfobox select a.InnerHtml).ToList(); List <VideoInfo> strResult = new List <VideoInfo>(); foreach (string item in tset) { if (!string.IsNullOrEmpty(item) && item.Contains(":")) { //别名:<span></span> string[] items = item.Split(':'); string _value = items[1].Trim(); HtmlDocument docs = new HtmlDocument(); docs.LoadHtml(_value); strResult.Add(new VideoInfo { key = items[0].Trim(), value = docs.DocumentNode.InnerText.Replace(" ", " ").Trim() }); } else { } } videoInfo.videoTotalInfo = strResult; } catch { } //string vodin = "//div[@class='vodinfobox']/ul/li"; //var htmle = rootnode1.SelectNodes(vodin); //剧情介绍 videoInfo.videoSynopsis = Compress(rootnode1.SelectSingleNode("//div[@class='vodplayinfo']").InnerText); //地址 videoInfo.playInformation = new List <PlayAddress>(); HtmlNodeCollection playType_ = rootnode1.SelectNodes("//div[@class='vodplayinfo']/div"); if (playType_ != null) { foreach (HtmlNode playTypeOne in playType_) { var playType = playTypeOne.SelectNodes("//span[@class='suf']"); if (playType != null) { foreach (HtmlNode item in playType) { PlayAddress routing = new PlayAddress(); routing.playType = item.InnerText; routing.videoUrl = new List <VideoUrl>(); var Urls = item.SelectNodes("../../ul/li"); foreach (var item1 in Urls) { VideoUrl videourl = new VideoUrl(); string[] ars = item1.InnerText.Split('$'); videourl.playName = ars[0] != null ? ars[0] : " "; videourl.playURL = ars[1] != null ? ars[1] : " "; routing.videoUrl.Add(videourl); } videoInfo.playInformation.Add(routing); } if (videoInfo.playInformation.Count > 0) { return(videoInfo); } } else { var playTypes = playTypeOne.SelectNodes("h3"); if (playTypes != null) { foreach (var item in playTypes) { //播放类型: PlayAddress routing = new PlayAddress(); routing.playType = item.InnerText; if (routing.playType.Contains(":")) { routing.playType = routing.playType.Split(':')[1].Trim(); } routing.videoUrl = new List <VideoUrl>(); var Urls = item.SelectNodes("../ul/li"); if (Urls != null) { foreach (var item1 in Urls) { VideoUrl videourl = new VideoUrl(); string[] ars = item1.InnerText.Split('$'); videourl.playName = ars[0] != null ? ars[0] : " "; videourl.playURL = ars[1] != null ? ars[1] : " "; routing.videoUrl.Add(videourl); } videoInfo.playInformation.Add(routing); } } } } } } return(videoInfo); } catch { return(null); } }
protected void Page_Load(object sender, EventArgs e) { try { string aStcokNumber = Request["Number"].ToString(); StockData aData = new StockData(); ////下載 Yahoo 奇摩股市資料 HtmlWeb aWeb = new HtmlWeb(); aWeb.OverrideEncoding = Encoding.GetEncoding("big5"); ////使用預設編碼讀入 HTML HtmlDocument doc = new HtmlDocument(); doc = aWeb.Load("http://tw.stock.yahoo.com/q/q?s=" + aStcokNumber); //裝載第一層查詢結果 HtmlDocument docStockContext = new HtmlDocument(); docStockContext.LoadHtml(doc.DocumentNode.SelectSingleNode("/html[1]/body[1]/center[1]/table[2]/tr[1]/td[1]/table[1]").InnerHtml); //取得個股標頭 HtmlNodeCollection nodeHeaders = docStockContext.DocumentNode.SelectNodes("./tr[1]/th"); //取得個股數值 string[] aStockValues = docStockContext.DocumentNode.SelectSingleNode("./tr[2]").InnerText.Trim().Split('\n'); for (int i = 0; i < aStockValues.Length; i++) { aStockValues[i] = aStockValues[i].Replace(" ", ""); } List <string> aTitleData = new List <string>(); //輸出資料 foreach (HtmlNode nodeHeader in nodeHeaders) { if (nodeHeader.InnerText.Trim() != "") { aTitleData.Add(HttpUtility.HtmlDecode(nodeHeader.InnerText)); } } //0.代碼 1.時間(不需要) 2.成交價 3.買進價 4.賣出價 5.漲跌 6.張數 7.昨收 8.開盤 9.最高 10.最低 aData.StockValue = aStockValues; aData.StockValue[0] = aData.StockValue[0].Replace("加到投資組合", ""); //取得股票資料標頭 aData.StockTitle = aTitleData.ToArray(); if (CheckStockData(aStcokNumber) == false) { InsertToSQL(aStockValues, aStcokNumber); } else { UpdateSQLData(aStockValues, aStcokNumber); } //取得新聞標題 aData.NewsTitle = GetNewsTitle(doc); //取得新聞URL aData.NewsURL = GetNewsURL(doc); //aData.IsFavourite = CheckIsFavourite(aStcokNumber); doc = null; docStockContext = null; aData.Result = "0"; string aJsonStr = JsonConvert.SerializeObject(aData, new JsonSerializerSettings() { StringEscapeHandling = StringEscapeHandling.EscapeNonAscii }); Response.Write(aJsonStr); } catch (Exception ex) { Response.Write(ex); } }
private static void ProcessVehicleListOnPage(HtmlNodeCollection vehicleListItems, string DealerName, ref List <VEHICLE> PreviouslyFoundVehicles, string Market) { if (vehicleListItems == null) { return; } char[] killcomma = { ',' }; foreach (HtmlNode vehicle in vehicleListItems) { HtmlNode VehicleNode = vehicle.SelectSingleNode("div[@data-vin]"); string VehicleVIN = VehicleNode.Attributes["data-vin"].Value.ToString(); string VehicleBodyStyle = VehicleNode.Attributes["data-bodyStyle"].Value.ToString(); string VehicleMake = VehicleNode.Attributes["data-make"].Value.ToString(); string VehicleYear = VehicleNode.Attributes["data-year"].Value.ToString(); string VehicleModel = VehicleNode.Attributes["data-model"].Value.ToString(); string VehicleTrim = VehicleNode.Attributes["data-trim"].Value.ToString(); string VehicleExtColor = VehicleNode.Attributes["data-exteriorcolor"].Value.ToString(); VEHICLE ExistingVehicle = VehicleDBManager.GetVehicleByVIN(VehicleVIN); List <VehiclePriceHistory> PriceHistory = null; bool Existing = false; // Check this vehicle against the list of previously found vehicles. // If it is found, remove it from the list, which ultimately removes it // from the "sold vehicle" list. if (ExistingVehicle != null && PreviouslyFoundVehicles != null) { Existing = true; PriceHistory = RetrieveHistory(ExistingVehicle.VEHICLE_HISTORY); var FoundThisOne = (from v in PreviouslyFoundVehicles where v.VIN == VehicleVIN select v).SingleOrDefault <VEHICLE>(); if (!ReferenceEquals(null, FoundThisOne)) { PreviouslyFoundVehicles.Remove((VEHICLE)FoundThisOne); } } else { PriceHistory = new List <VehiclePriceHistory>(); } HtmlNodeCollection VehicleNodeChildren = vehicle.ChildNodes; HtmlNode LinkNode = VehicleNodeChildren[1].SelectSingleNode("div/div[@class='media']/a[@href]"); string VehicleLink = BaseURI + LinkNode.Attributes["href"].Value.ToString(); HtmlNode DescriptionNode = VehicleNodeChildren[1].SelectSingleNode("div/div[@class='description']"); HtmlNodeCollection DescriptionLists = VehicleNodeChildren[1].SelectNodes("div/div[@class='description']/dl"); string VehicleEngine = string.Empty; string VehicleTrans = string.Empty; string IntColor = string.Empty; string Mileage = string.Empty; string StockNumber = string.Empty; string ModelCode = string.Empty; string VehicleDriveType = string.Empty; foreach (HtmlNode DList in DescriptionLists) { string ItemType = string.Empty; foreach (HtmlNode Item in DList.ChildNodes) { string Data = string.Empty; if (Item.Name == "dt") { ItemType = Item.InnerText; continue; } else if (Item.Name == "dd") { Data = Item.InnerText.Trim(killcomma); } else { continue; } switch (ItemType) { case "Engine:": VehicleEngine = Data; break; case "Transmission:": if (VehicleTrans.Length <= Data.Length) { VehicleTrans = Data; } break; case "Exterior Color:": if (Data != VehicleBodyStyle) { VehicleExtColor = Data; } break; case "Interior Color:": IntColor = Data; break; case "Stock #:": StockNumber = Data; break; case "Mileage:": Mileage = Data.Trim(); Mileage = Mileage.Replace(",", ""); Mileage = Mileage.Replace("miles", ""); if (Mileage.Length == 0 || Mileage == null || Mileage == " ") { Mileage = "0"; } break; case "Model Code:": ModelCode = Data; break; case "Drive Line:": VehicleDriveType = Data; break; } ItemType = string.Empty; } } // Stuff we might have to go to vehicle detail pages to retrieve: Mileage, Drive type, price // Mileage not on search page? (Snethkamp) if (Mileage == string.Empty || Mileage == "UNKNOWN" || Mileage == "0") { Mileage = GetVehicleDetail(VehicleLink, "ddc-span6", "Mileage", "UNKNOWN"); Mileage = Mileage.Trim(); Mileage = Mileage.Replace(",", ""); Mileage = Mileage.Replace("miles", ""); Mileage = Mileage.Replace(@"\n", ""); } // Vehicle Drive Type not on search page? if (VehicleDriveType == string.Empty || VehicleDriveType == "UNKNOWN") { VehicleDriveType = GetVehicleDetail(VehicleLink, "item powertrain", "Drive type", "UNKNOWN"); } if (VehicleDriveType == string.Empty || VehicleDriveType == "UNKNOWN") { VehicleDriveType = GetVehicleDetail(VehicleLink, "powertrain", "Drive type", "UNKNOWN"); } // Wheel Size string WheelSizeInches = GetVehicleDetail(VehicleLink, "item suspension-handling", "Wheel size", "UNKNOWN"); if (WheelSizeInches == "UNKNOWN") { WheelSizeInches = GetVehicleDetail(VehicleLink, "suspension-handling", "Wheel size", "UNKNOWN"); } if (WheelSizeInches == "UNKNOWN") { WheelSizeInches = GetVehicleDetail(VehicleLink, "standard-features", "Wheel Diameter", "UNKNOWN"); } // Vehicle Price string Price = string.Empty; HtmlNode PriceNode = VehicleNodeChildren[1].SelectSingleNode("div/div[@class='pricing-area has-buttons']"); HtmlNode ValueNode = PriceNode.SelectSingleNode("ul/li/span/span[@class='value']"); if (ValueNode != null) { Price = PriceNode.SelectSingleNode("ul/li/span/span[@class='value']").InnerText; Price = Price.Replace("$", ""); Price = Price.Replace(",", ""); if (Price.Contains("/mo") || Price.Contains("month")) { Price = "0.00"; } } VehiclePriceHistory vph = new VehiclePriceHistory(); vph.VIN = VehicleVIN; vph.Date_Recorded = DateTime.Now.ToLocalTime(); vph.Price = Price; PriceHistory.Add(vph); // Carfax string CarfaxURL = string.Empty; HtmlNode CarfaxNode = DescriptionNode.SelectSingleNode("div[@class='calloutDetails']/ul/li[@class='carfax']"); if (CarfaxNode != null) { CarfaxURL = CarfaxNode.SelectSingleNode("a").Attributes["href"].Value.ToString(); } // Images // List<int> ImageIds = GetandStoreVehicleImages(VehicleLink); //var strImageIdListcsv = string.Join(", ", ImageIds); // Console.WriteLine(strImageIdListcsv); if (ExistingVehicle != null) { ExistingVehicle.VEHICLE_HISTORY = SaveHistory(PriceHistory); ExistingVehicle.BODY_STYLE = VehicleBodyStyle; ExistingVehicle.CARFAX_URL = CarfaxURL; ExistingVehicle.COLOR_EXTERIOR = VehicleExtColor; ExistingVehicle.COLOR_INTERIOR = IntColor; ExistingVehicle.CURRENT_PRICE = Price; ExistingVehicle.DATE_LAST_SEEN = DateTime.Now; ExistingVehicle.DEALERSHIP_NAME = DealerName; ExistingVehicle.DEALER_DETAIL_URL = VehicleDetailURL; ExistingVehicle.DRIVE_TRAIN = VehicleDriveType; ExistingVehicle.ENGINE = VehicleEngine; ExistingVehicle.MAKE = VehicleMake; ExistingVehicle.MARKET = Market; //ExistingVehicle.IMAGEIDCSV = strImageIdListcsv; ExistingVehicle.MILEAGE = Mileage; ExistingVehicle.MODEL = VehicleModel; ExistingVehicle.MODEL_CODE = ModelCode; ExistingVehicle.STOCK_NUMBER = StockNumber; ExistingVehicle.TRANSMISSION = VehicleTrans; ExistingVehicle.TRIM = VehicleTrim; ExistingVehicle.VEHICLE_HISTORY = SaveHistory(PriceHistory); ExistingVehicle.VIN = VehicleVIN; ExistingVehicle.WHEEL_SIZE = WheelSizeInches; ExistingVehicle.YEAR = VehicleYear; VehicleDBManager.UpdateVehicleRecord(ExistingVehicle); } else { VEHICLE foundvehicle = new VEHICLE(); foundvehicle.BODY_STYLE = VehicleBodyStyle; foundvehicle.CARFAX_URL = CarfaxURL; foundvehicle.COLOR_EXTERIOR = VehicleExtColor; foundvehicle.COLOR_INTERIOR = IntColor; foundvehicle.CURRENT_PRICE = Price; foundvehicle.DATE_LAST_SEEN = DateTime.Now; foundvehicle.DEALERSHIP_NAME = DealerName; foundvehicle.DEALER_DETAIL_URL = VehicleDetailURL; foundvehicle.DRIVE_TRAIN = VehicleDriveType; foundvehicle.ENGINE = VehicleEngine; foundvehicle.MAKE = VehicleMake; foundvehicle.MARKET = Market; //foundvehicle.IMAGEIDCSV = strImageIdListcsv; foundvehicle.MILEAGE = Mileage; foundvehicle.MODEL = VehicleModel; foundvehicle.MODEL_CODE = ModelCode; foundvehicle.STOCK_NUMBER = StockNumber; foundvehicle.TRANSMISSION = VehicleTrans; foundvehicle.TRIM = VehicleTrim; foundvehicle.VEHICLE_HISTORY = SaveHistory(PriceHistory); foundvehicle.VIN = VehicleVIN; foundvehicle.WHEEL_SIZE = WheelSizeInches; foundvehicle.YEAR = VehicleYear; VehicleDBManager.InsertVehicle(foundvehicle); } Console.WriteLine($@" VIN : {VehicleVIN} Found Previously? {Existing} {VehicleYear} {VehicleMake} {VehicleModel} {VehicleTrim} ({VehicleBodyStyle}) Drivetrain: Engine: {VehicleEngine} | Transmission: {VehicleTrans} Drive type: {VehicleDriveType} | Wheel size: {WheelSizeInches} Color: Exterior: {VehicleExtColor} | Interior: {IntColor} Mileage: {Mileage} Price: {Price} Stock #: {StockNumber} | Model Code: {ModelCode} Direct URL: {VehicleLink} Carfax URL: {CarfaxURL}"); VehicleVIN = StockNumber = VehicleYear = VehicleMake = VehicleModel = ModelCode = VehicleTrim = VehicleEngine = VehicleTrans = VehicleDriveType = WheelSizeInches = VehicleBodyStyle = VehicleExtColor = Mileage = IntColor = Price = CarfaxURL = VehicleLink = string.Empty; } }
public static void ProcessAllDealers() { WebClient client = new System.Net.WebClient(); HtmlWeb webGet = new HtmlWeb(); List <DEALERSHIP> dealers = VehicleDBManager.GetDealerships("Dealer.Com"); if (dealers == null) { return; } foreach (DEALERSHIP dealer in dealers) { string DealerName = dealer.DEALER_NAME; string MainUrl = dealer.DEALER_URL; Console.WriteLine($"************\r\n*** Processing Dealer: {DealerName}\r\n************"); HtmlDocument mainpage = webGet.Load(MainUrl); bool KeepProcessing = (mainpage != null); int PageCount = 0; BaseURI = new Uri(MainUrl).GetLeftPart(UriPartial.Authority); Console.WriteLine($"Base URL for dealership: {BaseURI}"); List <VEHICLE> PreviouslyFoundAtDealer = VehicleDBManager.GetAllUnsoldVehiclesForDealer(DealerName); while (KeepProcessing) { Console.WriteLine($"\r\n\r\bRetrieving page #{++PageCount} of vehicles for dealer {DealerName} in the {dealer.MARKET_AREA_NAME} Market Area\r\n"); // Note: if the items are "shared", then they are listings from affiliated dealerships. HtmlNodeCollection VehicleListItems = mainpage.DocumentNode.SelectNodes("//li[substring(@class, 1, 14)='item notshared']"); ProcessVehicleListOnPage(VehicleListItems, DealerName, ref PreviouslyFoundAtDealer, dealer.MARKET_AREA_NAME); HtmlNode NextLink = mainpage.DocumentNode.SelectSingleNode("//a[@class='ddc-btn ddc-btn-link ddc-btn-xsmall'][@rel='next']"); string Next = string.Empty; if (NextLink != null) { Next = NextLink.Attributes["href"].Value.ToString(); string NextURL = MainUrl + Next; mainpage = webGet.Load(NextURL); KeepProcessing = (mainpage != null); } else { KeepProcessing = false; } } if (!KeepProcessing && PreviouslyFoundAtDealer != null) { // All the vehicles currently at this dealer have been screen-scraped. // Now go through the complete list of "STILL_FOR_SALE" vehicles from the previous run // and update any that have disappeared from their website as being sold or auctioned. Console.WriteLine($"Found {PreviouslyFoundAtDealer.Count} vehicle(s) removed from dealer inventory since last run."); foreach (VEHICLE SoldOrAuctionedVehicle in PreviouslyFoundAtDealer) { SoldOrAuctionedVehicle.STILL_FOR_SALE = "NO"; List <VehiclePriceHistory> PriceHistory = RetrieveHistory(SoldOrAuctionedVehicle.VEHICLE_HISTORY); var MinDate = (from pricehistories in PriceHistory select pricehistories.Date_Recorded).Min(); var MaxDate = (from pricehistories in PriceHistory select pricehistories.Date_Recorded).Max(); var DaysOnMarket = MaxDate - MinDate; var FinalPriceHistory = (from pricehistories in PriceHistory where pricehistories.Date_Recorded == MaxDate select pricehistories).SingleOrDefault <VehiclePriceHistory>(); FinalPriceHistory.WasFinalPrice = "YES"; SoldOrAuctionedVehicle.VEHICLE_HISTORY = SaveHistory(PriceHistory); Console.WriteLine($@"Updating VIN {SoldOrAuctionedVehicle.VIN} : {SoldOrAuctionedVehicle.YEAR} {SoldOrAuctionedVehicle.MAKE} {SoldOrAuctionedVehicle.MODEL} (Stock number {SoldOrAuctionedVehicle.STOCK_NUMBER}) as sold. Final price was {FinalPriceHistory.Price}, Mileage was {SoldOrAuctionedVehicle.MILEAGE}, Days on market was {DaysOnMarket.Days}"); VehicleDBManager.UpdateVehicleRecord(SoldOrAuctionedVehicle); } } } }
public void ItemByIndexTest() { HtmlElement root = new HtmlElement("root"); HtmlNodeCollection target = new HtmlNodeCollection(root); target.Add(new HtmlElement("first")); target.Add(new HtmlElement("second")); target.Add(new HtmlElement("third")); Assert.AreEqual(target[1], target["second"]); target[2] = new HtmlElement("another"); target[0] = null; StringAssert.Contains(target[2].ToString(), "another"); }
/// <summary> /// Lấy thêm thông tin (Link Stream, Size, PictureCover)vào đối tượng song của lớp Media.NowPlayingSong /// </summary> /// <param name="song"> đối tượng được bổ sung thông tin</param> public static List <Song> GetMoreSongInfo(string linkPage, out string linkStream, out string lyric, out string linkPicture, out System.Windows.Media.Imaging.BitmapImage pictureCover, string quality, out string size, string albumCover, bool isGetSize = false) { lyric = ""; size = ""; linkPicture = ""; HtmlDocument wap = new HtmlDocument(); wap.LoadHtml(SourceWeb.GetWebSource(linkPage.Replace("http://www", "http://m"))); //Lấy lyrics try { HtmlNode lyricNode = wap.DocumentNode.SelectSingleNode("//div[@class='lyric']"); foreach (HtmlNode node in lyricNode.ChildNodes) { lyric += WebUtility.HtmlDecode(node.InnerText); } } catch { } //Lấy Stream try { HtmlNode stream = wap.DocumentNode.SelectSingleNode("//div[@class='download']"); if (quality == null) { linkStream = stream.SelectSingleNode(".//a").GetAttributeValue("href", ""); if (isGetSize) { WebRequest request; WebResponse reponse; request = WebRequest.Create(linkStream); request.Method = "HEAD"; reponse = request.GetResponse(); size = Math.Round(reponse.ContentLength * 1.0 / 1024 / 1024, 2).ToString() + "MB"; reponse.Close(); } } else if (quality.Contains("128")) { linkStream = stream.SelectSingleNode(".//a").GetAttributeValue("href", ""); if (isGetSize) { WebRequest request; WebResponse reponse; request = WebRequest.Create(linkStream); request.Method = "HEAD"; reponse = request.GetResponse(); size = Math.Round(reponse.ContentLength * 1.0 / 1024 / 1024, 2).ToString() + "MB"; reponse.Close(); } } else { WebRequest request; WebResponse reponse; HtmlNode pdlikeNode = wap.DocumentNode.SelectSingleNode("//div[@class='pdlike']"); HtmlNode _blankNode = pdlikeNode.SelectSingleNode(".//a[@target='_blank']"); request = WebRequest.Create(_blankNode.GetAttributeValue("href", "")); reponse = request.GetResponse(); linkStream = reponse.ResponseUri.ToString(); if (linkStream.Contains("login")) { linkStream = stream.SelectSingleNode(".//a").GetAttributeValue("href", ""); } //size if (isGetSize) { size = Math.Round(reponse.ContentLength * 1.0 / 1024 / 1024, 2).ToString() + "MB"; } reponse.Close(); } } catch { linkStream = null; } if (albumCover == null) { pictureCover = new System.Windows.Media.Imaging.BitmapImage(new Uri(/*MY_PACK_URIS.ALBUM_COVER_PICTURE.NHAC_CUA_TUI*/ null)); } else { pictureCover = new System.Windows.Media.Imaging.BitmapImage(new Uri(albumCover)); linkPicture = albumCover; } //Lấy các bài hát gợi ý HtmlNodeCollection nodeSuggestSongs = wap.DocumentNode.SelectNodes("//div[@id='relatedSong']/div[@class='row bgmusic ']"); if (nodeSuggestSongs == null) { return(null); } List <Song> listSuggestSong = new List <Song>(); foreach (HtmlNode nodeSong in nodeSuggestSongs) { Song tempSong = new Song(); HtmlNode nodeArtist = nodeSong.SelectSingleNode(".//p"); tempSong.Name_Artist = nodeArtist.ChildNodes[1].InnerText; HtmlNode nodeTitle = nodeSong.SelectSingleNode(".//h3/a"); tempSong.Name_Song = nodeTitle.InnerText; tempSong.URL = nodeTitle.GetAttributeValue("href", null).Replace("http://m", "http://www"); tempSong.Name_Album = "Nhac Cua Tui"; if (tempSong.URL != null) { listSuggestSong.Add(tempSong); } } return(listSuggestSong); }
public override void FindItems(out List <Product> listOfProducts, SearchSettingsBase settings, CancellationToken token) { listOfProducts = new List <Product>(); string searchUrl = UrlPrefix + string.Format(Keywords, settings.KeyWords) + PageSizeSuffix; //int genderId = 2; // 0 - all, 1 - mens, 2 - womens, 3 - boys, 4 - girls //int colorId = 9; int genderId = 0; int colorId = 0; if (genderId != 0 || colorId != 0) { Color color = null; Gender gender = null; string genderString = ""; string crumbs = ""; string cmREF; if (genderId != 0) { gender = genders[genderId]; genderString = "/" + gender.name; } if (colorId != 0) { color = colors[colorId]; } string colorGenderId = ""; if (color != null && gender != null) { colorGenderId = gender.id + "Z" + color.id; crumbs = gender.crumbs + " " + color.crumbs; cmREF = color.cmRef; } else if (color != null) { colorGenderId = color.id; crumbs = color.crumbs; cmREF = color.cmRef; } else { colorGenderId = gender.id; crumbs = gender.crumbs; cmREF = gender.cmRef; } searchUrl = WebsiteBaseUrl + string.Format(GenderColorPrefix, genderString, colorGenderId) + string.Format(Keywords, settings.KeyWords) + PageSizeSuffix + string.Format(GenderPostfix, crumbs, cmREF); } HtmlNode container = null; while (container == null) { HtmlNode node = InitialNavigation(searchUrl, token); container = node.SelectSingleNode(UlXpath); } HtmlNodeCollection children = container.SelectNodes("./li"); foreach (HtmlNode child in children) { token.ThrowIfCancellationRequested(); #if DEBUG LoadSingleProduct(listOfProducts, child); #else LoadSingleProductTryCatchWraper(listOfProducts, child); #endif } }
public override List <Img> GetImages(string pageString, System.Net.IWebProxy proxy) { List <Img> imgs = new List <Img>(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(pageString); //retrieve all elements via xpath HtmlNodeCollection nodes = null; if (srcType == PixivSrcType.Tag) { nodes = doc.DocumentNode.SelectSingleNode("//ul[@class='_image-items autopagerize_page_element']").SelectNodes("li"); //nodes = nodes[nodes.Count - 1].SelectNodes("li"); } else if (srcType == PixivSrcType.Author) { nodes = doc.DocumentNode.SelectSingleNode("//ul[@class='_image-items']").SelectNodes("li"); } //else if (srcType == PixivSrcType.Day || srcType == PixivSrcType.Month || srcType == PixivSrcType.Week) //ranking //nodes = doc.DocumentNode.SelectSingleNode("//section[@class='ranking-items autopagerize_page_element']").SelectNodes("div"); else { //ranking nodes = doc.DocumentNode.SelectNodes("//section[@class='ranking-item']"); } if (nodes == null) { return(imgs); } foreach (HtmlNode imgNode in nodes) { try { HtmlNode anode = imgNode.SelectSingleNode("a"); if (srcType == PixivSrcType.Day || srcType == PixivSrcType.Month || srcType == PixivSrcType.Week) { anode = imgNode.SelectSingleNode(".//div[@class='ranking-image-item']").SelectSingleNode("a"); } //details will be extracted from here //eg. member_illust.php?mode=medium&illust_id=29561307&ref=rn-b-5-thumbnail string detailUrl = anode.Attributes["href"].Value.Replace("amp;", ""); string previewUrl = null; if (srcType == PixivSrcType.Tag || srcType == PixivSrcType.Author) { previewUrl = anode.SelectSingleNode(".//img").Attributes["src"].Value; } else { previewUrl = anode.SelectSingleNode(".//img").Attributes["data-src"].Value; } if (previewUrl.Contains('?')) { previewUrl = previewUrl.Substring(0, previewUrl.IndexOf('?')); } //extract id from detail url //string id = detailUrl.Substring(detailUrl.LastIndexOf('=') + 1); string id = System.Text.RegularExpressions.Regex.Match(detailUrl, @"illust_id=\d+").Value; id = id.Substring(id.IndexOf('=') + 1); Img img = GenerateImg(detailUrl, previewUrl, id); if (img != null) { imgs.Add(img); } } catch { //int i = 0; } } return(imgs); }
/// <summary> /// This will move all the nodes from the specified index to the new parent. /// </summary> private static void MoveNodesDown(ref HtmlNodeCollection nodes, int index, HtmlElement newParent) { int count = nodes.Count; for (int i = index; i < count; i++) { ((HtmlElement)newParent).Nodes.Add(nodes[i]); nodes[i].SetParent(newParent); } for (int i = index; i < count; i++) { nodes.RemoveAt(index); } newParent.IsExplicitlyTerminated = true; }
private static string GetMetaProperty(HtmlNodeCollection nodes, string prop) => HttpUtility.HtmlDecode(nodes.FirstOrDefault(x => x.Attributes["property"]?.Value == prop)?.Attributes["content"]?.Value);
/// <summary> /// Get friend's id of someone -- heavy method -- need refactor /// </summary> /// <param name="userIdOrAlias"> /// If userId passed is blank then you will get your friend list. /// Else you will get friend list of this id. /// </param> /// <returns>List id of friends</returns> public UserInfo GetUserInfo(string userIdOrAlias, bool includeUserAbout = false, bool includedFriendList = false) { // TODO : messy method - we need refactor it. if (string.IsNullOrWhiteSpace(userIdOrAlias)) { throw new ArgumentException("userIdOrAlias must not null or empty."); } UserInfo userInfo = new UserInfo(); // TODO : Reduce check // The first time we passed userIdOrAlias // We don't know it is userIdOrAlias so we need to check // The second time when we known passed param is user id or alias // But we still need check again. it make perf decrease. string userAboutUrl = string.Empty; bool isUserId = !CompiledRegex.Match("NonDigit", userIdOrAlias).Success; if (isUserId) { userAboutUrl = "https://m.facebook.com/profile.php?v=info&id=" + userIdOrAlias; userInfo.Id = userIdOrAlias; userInfo.Alias = string.Empty; } else { userAboutUrl = "https://m.facebook.com/" + userIdOrAlias + "/about"; userInfo.Id = string.Empty; userInfo.Alias = userIdOrAlias; } HtmlNode htmlDom = this.BuildDom(userAboutUrl); // Get avatar anchor tag : // avatarAnchorElem contain avatar image source, user display name and maybe contain id. // if userIdOrAlias is current user or other user with animated avatar then 1st xpath is wrong // pick another anchor HtmlNode avatarAnchor = htmlDom.SelectSingleNode("//div[@id='root']/div/div/div/div/div/a"); if (avatarAnchor == null || avatarAnchor.InnerText == ConstString.EditProfilePicture || // for current user avatarAnchor.InnerText == ConstString.AddProfilePicture) // for animate avatar { avatarAnchor = htmlDom.SelectSingleNode("//div[@id='root']/div/div/div/div/div/div/a"); } else if (avatarAnchor.SelectSingleNode("div/a/img") != null || (avatarAnchor.PreviousSibling != null && avatarAnchor.PreviousSibling.SelectSingleNode("/a/img") != null)) { HtmlNodeCollection anchors = avatarAnchor.SelectNodes("div/a"); if (anchors != null) { foreach (HtmlNode anchor in anchors) { if (anchor.SelectSingleNode("img") != null) { avatarAnchor = anchor; break; } } } } else { // Support another xpath to get user id } // get user id if (!isUserId && avatarAnchor != null) { Match idMatch = Match.Empty; // trying get id from avatar href var avatarHref = avatarAnchor.GetAttributeValue("href", null); // If we found avatar href, we might using it to detect user id if (avatarHref != null) { // There is 3 pattern to detect user id // If we get another url format, return this url for detect later. // /photo.php?fbid=704517456378829&id=100004617430839&... // /profile/picture/view/?profile_id=100003877944061&... // /story.php\?story_fbid=\d+&id=(?<id>\d+) for animate avatar if ((idMatch = CompiledRegex.Match("UserIdFromAvatar1", avatarHref)).Success || (idMatch = CompiledRegex.Match("UserIdFromAvatar2", avatarHref)).Success || (idMatch = CompiledRegex.Match("UserIdFromAvatar3", avatarHref)).Success) { userInfo.Id = idMatch.Groups["id"].Value; } } // try another way if (string.IsNullOrEmpty(userInfo.Id)) { // Trying to detect user id from hyperlink : // Timeline · Friends · Photos · Likes · Followers · Following · [Activity Log] // NOTE : // - Activity log only show in current user about page // // Important : /div/div/div/a must select before /div/div/a. Do not swap SelectNodes order. HtmlNodeCollection anchors = htmlDom.SelectNodes("//div[@id='root']/div/div/div/a") ?? htmlDom.SelectNodes("//div[@id='root']/div/div/a"); if (anchors != null && anchors.Count > 0) { foreach (HtmlNode anchor in anchors) { // Get and check hrefAttr and innerText // If both of them have value then we can detect it using compiled pattern // NOTE : Check pattern if you think it's incorrect. string hrefAttr = anchor.GetAttributeValue("href", string.Empty); string innerText = anchor.InnerText; if (!string.IsNullOrWhiteSpace(innerText) && (idMatch = CompiledRegex.Match(innerText, hrefAttr)).Success) { userInfo.Id = idMatch.Groups["id"].Value; break; } } } } // Try another way if id still empty if (string.IsNullOrEmpty(userInfo.Id)) { // Step 3 : // trying get uid from action button : Add Friend, Message, Follow, More // I only select the 1st xpath,the second xpath will be check in the future. HtmlNodeCollection btnHrefts = htmlDom.SelectNodes("//div[@id='root']/div/div/div/table/tr/td/a"); // ??htmlDom.SelectNodes("//div[@id='root']/div/div/div"); // If we found some button nodes : // - Add Friend node if we does not add friend with this user // - Message if this user allow we can send message to him/her // - Follow if this user allow we can follow him/her and we not follow him/her before // - More if we can see more about user - i think that, maybe is incorrect. if (btnHrefts != null && btnHrefts.Count > 0) { foreach (var btnHreft in btnHrefts) { // if href and innertext not null then we can trying detect user id by compiled regex // NOTE : // - Check CompiledRegex if you think it not correct anymore // - Edit Key if you use another Language rather than English to access FB string hrefAttr = btnHreft.GetAttributeValue("href", string.Empty); string innerText = btnHreft.InnerText; if (!string.IsNullOrWhiteSpace(innerText) && (idMatch = CompiledRegex.Match(innerText, hrefAttr)).Success) { userInfo.Id = idMatch.Groups["id"].Value; break; } } } } } if (string.IsNullOrEmpty(userInfo.Id)) { _logger.WriteLine("Could not detect id from " + userAboutUrl); return(null); } // if user id has been detected, check if we want to get user about too if (includeUserAbout && avatarAnchor != null) { HtmlNode avatar = avatarAnchor.SelectSingleNode("img"); if (avatar != null) { // Get name and avatar userInfo.DispayName = WebUtility.HtmlDecode(avatar.GetAttributeValue("alt", string.Empty)); userInfo.AvatarUrl = WebUtility.HtmlDecode(avatar.GetAttributeValue("src", string.Empty)); } else { _logger.WriteLine("Img tag in avatar is null. Addition info : " + userAboutUrl); } } // get user friend list if included // at this step we do not need to check user id anymore // if user id is null then we had return before. if (includedFriendList) { var firstFriendPageUrl = "https://m.facebook.com/profile.php?v=friends&startindex=0&id=" + userInfo.Id; userInfo.Friends = GetFriends(firstFriendPageUrl); } // all step have done return(userInfo); }
static void Main(string[] args) { string url = "http://businessdirectory.esdlife.com/wedding/CH-TC/Wedding2CompInfo.php?pages=1&catID={0}"; string detailurl = "http://businessdirectory.esdlife.com/wedding/CH-TC/{0}"; string sqlresult = ""; try { string urlfilepath = rootDir + mappingfile; IEnumerable <string> ls = File.ReadLines(urlfilepath); string detailId = ""; foreach (string cats in ls.ToList()) { //HtmlWeb website = new HtmlWeb(); //HtmlAgilityPack.HtmlDocument doc = website.Load(path); //dynamic data = Newtonsoft.Json.JsonConvert.DeserializeObject(doc.); if (string.IsNullOrEmpty(cats)) { continue; } string[] catgroup = cats.Split('\t'); string cat = catgroup[catgroup.Length - 1]; string cat_88 = catgroup[1]; if (string.IsNullOrEmpty(cat_88)) { cat_88 = cat; } string url_result = string.Format(url, cat); int page = getPageParameter(url_result); string pathurl = url; for (int pageindex = page; pageindex < 30; pageindex++) { pathurl = SetPageParameter(url_result, pageindex); HtmlWeb websiteList = new HtmlWeb(); HtmlAgilityPack.HtmlDocument maindoc = websiteList.Load(pathurl); HtmlNodeCollection nodes = maindoc.DocumentNode.SelectNodes("//div[@class='cn']/h3/a[@class='orangetxt']"); for (int i = 0; i < nodes.Count; i++) { string urlpath = string.Format(detailurl, nodes[i].Attributes["href"].Value); Console.WriteLine("urlpath: " + urlpath); detailId = nodes[i].Attributes["href"].Value.Replace("Wedding2ShowCompInfo.php?id=", ""); DataTable dt = SQLHelper.ExecuteDt("SELECT [temppostid] FROM [grabpost] with(nolock) where sourceRefId=" + detailId + " and sourceSite='" + sourceSite + "'"); if (dt.Rows.Count > 0) { continue; } HtmlWeb website = new HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = website.Load(urlpath); long categoryid = long.Parse(cat); string membermobile = ""; string mebercontactno = ""; string memberaddress = ""; string memberemail = ""; string memberwebsite = ""; string memberavator = ""; string membername = ""; string district = ""; string keyword = ""; string postcontent = ""; string summary = ""; string title = ""; string lat = ""; string lng = ""; float flat = 0; float flng = 0; string Catstring = ""; string postcontactno1 = ""; string postcontactno2 = ""; string postaddress = ""; string contactemail = ""; //string keyword=""; HtmlNode titleNode = doc.DocumentNode.SelectSingleNode("//h1[@class='d_title']"); //A HtmlNode mainNode = doc.DocumentNode.SelectSingleNode("//div[@class='text']"); //B,C,D,E,F,I HtmlNode regionNode = doc.DocumentNode.SelectSingleNode("//div[@id='guide_search']"); //E HtmlNode keywrodNode = doc.DocumentNode.SelectSingleNode("//div[@class='ser']/a"); //J HtmlNode contentNode = doc.DocumentNode.SelectSingleNode("//div[@class='d_content']"); //G 簡介: HtmlNode lat_lng = doc.DocumentNode.SelectSingleNode("//div[@id='wedding_content']").PreviousSibling.PreviousSibling; //H //HtmlNode memberavatorNode = doc.DocumentNode.SelectSingleNode("//div[@class='companyLogo']/img"); if (titleNode == null) { continue; } if (titleNode != null) { title = titleNode.InnerHtml.Replace("\n", "").Trim(); membername = title; } if (mainNode != null) { foreach (HtmlNode node in mainNode.ChildNodes) { if (node.InnerText.IndexOf("地址:") != -1) { postaddress = node.InnerText.Replace("\n", "").Replace("\t", "").Replace(" ", ""); memberaddress = postaddress; } if (node.InnerText.IndexOf("電話:") != -1) { string[] tels = node.InnerText.Replace("電話:", "").Replace("\n", "").Replace("\t", "").Replace(" ", "").Split('/'); if (tels.Length > 1) { postcontactno1 = tels[0]; postcontactno2 = tels[1]; } else { postcontactno1 = tels[0]; postcontactno2 = tels[0]; } } if (node.InnerText.IndexOf("電郵:") != -1) { string[] emails = node.NextSibling.InnerText.Split('/'); memberemail = emails[0].Trim(); if (emails.Length > 1) { contactemail = emails[1].Trim(); } } if (node.InnerText.IndexOf("網址:") != -1) { memberwebsite = node.NextSibling.InnerText; } if (node.InnerText.IndexOf("備註:") != -1) { summary = node.InnerText.Replace("備註:", "").Trim(); } } //postcontent = desNode.InnerHtml; //summary = desNode.InnerHtml; } if (keywrodNode != null) { keyword = keywrodNode.InnerHtml.Replace("\n", "").Trim(); string[] separators = { ",", "/", "|", ">", "." }; keyword = string.Join(",", keyword.Replace("\n", "").Replace("\t", "").Replace(" ", "").Split(separators, StringSplitOptions.RemoveEmptyEntries)); //keyword = Catstring; } if (lat_lng != null) { string[] lat_lngs = lat_lng.InnerText.Split('\n'); foreach (string str in lat_lngs) { string row = str.Trim().Replace(";", ""); if (row.IndexOf("var MerchantLatitude") != -1) { string[] separators = { "=" }; string[] lats = row.Split(separators, StringSplitOptions.RemoveEmptyEntries); if (lats.Length >= 2) { lat = lats[1].Replace("'", ""); float.TryParse(lat, out flat); continue; } } if (row.IndexOf("var MerchantLongitude") != -1) { string[] separators = { "=" }; string[] lngs = row.Split(separators, StringSplitOptions.RemoveEmptyEntries); if (lngs.Length >= 2) { lng = lngs[1].Replace("'", ""); float.TryParse(lng, out flng); break; } } } } if (contentNode != null) { postcontent = contentNode.InnerText.Replace("簡介:", "").Trim(); } if (regionNode != null) { //regionNode.LastChild. } /* * exec [dbo].insertgrabpost * @categoryid = 1197, --->cat_88 * @title =N'物流tile2333', --> title * @summary ='summary',--->desNode * @postcontent = 'content', --->postcontent * @keyword ='keyword', --->keyword * @district =N'香港島', --->district * @membername =N'abc', --->membername * @memberemail ='*****@*****.**', ---->memberemail * @memberavator ='http://avator', ---->memberavator * @membermobile ='+86-187238273', --->membermobile * @membercontactno ='932742', ---->mebercontactno * @memberaddress =N'adlfjeosdfdsfsd', --->memberaddress * @memberwebsite ='http://www.88db.com', ---->memberwebsite * @postcontactno1 ='22222', ---->postcontactno1 * @postcontactno2 =N'33333你好',--->postcontactno2 * @postaddress =N'广告地址', --->postaddress * @postlatitude = '12323', //lat * @postlongitude ='23232', //lng * @contactemail =N'33333你好', --->contactemail * @memberdesc =N'成员描述111111' --->desNode */ SQLHelper sqlHelper = new SQLHelper(); SqlParameter[] parameters = { new SqlParameter("@categoryid", SqlDbType.BigInt) { Value = cat_88 }, new SqlParameter("@title", SqlDbType.NVarChar, 512) { Value = title }, new SqlParameter("@summary", SqlDbType.NVarChar, 512) { Value = summary }, new SqlParameter("@postcontent", SqlDbType.NVarChar, -1) { Value = postcontent }, new SqlParameter("@keyword", SqlDbType.NVarChar, 512) { Value = keyword }, new SqlParameter("@district", SqlDbType.NVarChar, 64) { Value = district }, new SqlParameter("@membername", SqlDbType.NVarChar, 256) { Value = membername }, new SqlParameter("@memberemail", SqlDbType.VarChar, 100) { Value = memberemail }, new SqlParameter("@memberavator", SqlDbType.VarChar, 512) { Value = memberavator }, new SqlParameter("@membermobile", SqlDbType.VarChar, 30) { Value = membermobile }, new SqlParameter("@membercontactno", SqlDbType.VarChar, 30) { Value = "" }, new SqlParameter("@memberaddress", SqlDbType.NVarChar, 512) { Value = memberaddress }, new SqlParameter("@memberwebsite", SqlDbType.NVarChar, 512) { Value = memberwebsite }, //new new SqlParameter("@postcontactno1", SqlDbType.NVarChar, 30) { Value = postcontactno1 }, new SqlParameter("@postcontactno2", SqlDbType.NVarChar, 30) { Value = postcontactno2 }, new SqlParameter("@postaddress", SqlDbType.NVarChar, 512) { Value = postaddress }, new SqlParameter("@postlatitude", SqlDbType.Float) { Value = flat }, new SqlParameter("@postlongitude", SqlDbType.Float) { Value = flng }, new SqlParameter("@contactemail", SqlDbType.NVarChar, 100) { Value = contactemail }, new SqlParameter("@memberdesc", SqlDbType.NVarChar, 1000) { Value = summary }, new SqlParameter("@sourceSite", SqlDbType.NVarChar, 100) { Value = sourceSite }, new SqlParameter("@sourceRefID", SqlDbType.NVarChar, 100) { Value = detailId } }; StringBuilder paramstr = new StringBuilder(); paramstr.Append(" exec [dbo].insertgrabpost{"); //paramstr=parameters.t foreach (SqlParameter p in parameters) { paramstr.Append(p.ParameterName + "=N'" + p.Value + "',").AppendLine(); //paramstr + p.ParameterName + "=" + p.Value+"||||"; } paramstr.Append("}"); sqlresult = paramstr.ToString(); sqlHelper.RunProc("[dbo].insertgrabpost", parameters); } } } } catch (Exception ex) { File.AppendAllText(rootDir + logfile, ex.StackTrace + sqlresult); } }
private static string ParseHtmlBrsBodytoJson(string input) { string output = null; var isTD = true; StringBuilder jsonstring = new StringBuilder(); jsonstring.Append("{"); bool isPreviousProperty = false; string property = null; string value = null; if (!String.IsNullOrEmpty(input)) { var document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(input); var metaTags = document.DocumentNode.SelectNodes("//meta"); if (metaTags != null) { foreach (var tag in metaTags) { if (tag.Attributes["name"] != null && tag.Attributes["content"] != null) { if (tag.Attributes["name"].Value == "Generator") { isTD = false; } } } } else { isTD = true; } if (!isTD) { HtmlNodeCollection brs = document.DocumentNode.SelectNodes("//body"); if (brs != null && brs.Count > 0) { HtmlNode br = brs[0]; string brtext = br.InnerText; string[] lines = brtext.Split('\r'); foreach (string line in lines) { string[] propertyvalue = line.Split(':'); if (propertyvalue.Length == 2) { if (jsonstring.Length > 1) { jsonstring.Append(","); } //property = propertyvalue[0].Replace(" ", string.Empty).Replace(":", string.Empty).Replace(" ", string.Empty).Replace("\r\n", string.Empty).Trim(); property = propertyvalue[0].Replace(" ", string.Empty).Replace(":", string.Empty).Replace(" ", string.Empty).Replace("?", string.Empty).Replace("/", string.Empty).Replace("(", string.Empty).Replace(")", string.Empty).Replace("-", string.Empty).Replace("\r\n", string.Empty).Trim(); jsonstring.AppendFormat("'{0}':", property); value = propertyvalue[1].Replace(" ", string.Empty).Replace(":", string.Empty).Replace(" ", string.Empty).Replace("\r\n", string.Empty).Trim(); jsonstring.AppendFormat("'{0}'", value); } } } } else { HtmlNodeCollection tds = document.DocumentNode.SelectNodes("//td"); if (tds != null && tds.Count > 0) { foreach (HtmlNode td in tds) { string tdtext = td.InnerText; if (tdtext.Contains("BusinessLocationCity1")) { Console.WriteLine(tdtext.ToString()); //MessageBox.Show(tdtext.ToString()); } if ((tdtext.Contains(":") && !tdtext.Contains("Last Modified") && !tdtext.Contains(":/")) || (tdtext.Contains("-") && !tdtext.Any(c => char.IsDigit(c))) || (tdtext.Contains("=") && !tdtext.Contains("?"))) //TODO: IDENTIFY PROPERTY VS VALUE BETTER { if (jsonstring.Length > 1) { jsonstring.Append(","); } isPreviousProperty = true; property = td.InnerText.Replace(" ", string.Empty).Replace(":", string.Empty).Replace(" ", string.Empty).Replace("\r\n", string.Empty).Replace("(", string.Empty).Replace(")", string.Empty).Replace("?", string.Empty).Replace(".", string.Empty).Trim(); jsonstring.AppendFormat("'{0}':", property); } else if (isPreviousProperty) { isPreviousProperty = false; value = td.InnerText.Replace(" ", string.Empty).Replace("\r\n", string.Empty).Replace("'", string.Empty).Trim(); jsonstring.AppendFormat("'{0}'", value); } } } } } jsonstring.Append("}"); var deserialized = JsonConvert.DeserializeObject(jsonstring.ToString()); output = JsonConvert.SerializeObject(deserialized); if (output == "{}") { output = null; } return(output); }
public static async Task <string> GetSubtitle(Movie movie) { //get movie info from open-movie-db-api var client = new HttpClient(); HttpResponseMessage response = await client.GetAsync($"http://www.omdbapi.com/?&apikey={omdbapikey}={movie.Name}&y={movie.Year}&r=xml"); string result = await response.Content.ReadAsStringAsync(); XmlDocument doc = new XmlDocument(); doc.LoadXml(result); string id = string.Empty; foreach (XmlNode node in doc.DocumentElement) { id = node.Attributes[17].Value; } //get subtitle list page client = new HttpClient(); response = await client.GetAsync($"https://yts-subs.com/movie-imdb/{id}"); result = await response.Content.ReadAsStringAsync(); //filter data and find English subtitle var sublistpage = new HtmlDocument(); sublistpage.LoadHtml(result); HtmlNodeCollection nodes = sublistpage.DocumentNode.SelectNodes("//table[@class='table other-subs']/tbody/tr"); string link = string.Empty; foreach (var node in nodes) { var cell = node.SelectNodes("./td"); if (cell[1].InnerText.Trim().Equals("English")) { //get download page link link = cell[4].SelectSingleNode("./a").Attributes["href"].Value; } } if (string.IsNullOrWhiteSpace(link)) { return(string.Empty); } //get download page client = new HttpClient(); response = await client.GetAsync($"https://yts-subs.com{link}"); result = await response.Content.ReadAsStringAsync(); var downloadpage = new HtmlDocument(); downloadpage.LoadHtml(result); var divs = downloadpage.DocumentNode.SelectNodes("//div[@class='col-xs-12']"); var downloadlink = divs[1].SelectSingleNode("./a").Attributes["href"].Value; if (string.IsNullOrWhiteSpace(downloadlink)) { return(string.Empty); } Uri uri = new Uri(downloadlink); tempfilepath = Path.Combine(Path.GetTempPath(), Path.GetFileName(uri.LocalPath)); using (var wc = new WebClient()) { wc.DownloadFileCompleted += Wc_DownloadFileCompleted; wc.DownloadFileAsync(uri, tempfilepath); } client.Dispose(); return(downloadlink); }
public override List <Img> GetImages(string pageString, IWebProxy proxy) { List <Img> list = new List <Img>(); //API if (APImode) { list = booru.GetImages(pageString, proxy); if (list.Count > 0) { return(list); } } //Html HtmlDocument document = new HtmlDocument(); document.LoadHtml(pageString); HtmlNodeCollection previewNodes = document.DocumentNode.SelectNodes("//div[@class=\"thumbnail-preview\"]"); if (previewNodes == null) { return(list); } foreach (HtmlNode node in previewNodes) { HtmlNode node1 = node.SelectSingleNode("./span/a"); HtmlNode node2 = node1.SelectSingleNode("./img"); string detailUrl = FormattedImgUrl(node1.Attributes["href"].Value); string desc = Regex.Match(node1.InnerHtml, "(?<=title=\" ).*?(?= score)").Value; Img item = new Img() { Desc = desc, Tags = desc, Id = Convert.ToInt32(Regex.Match(node1.Attributes["id"].Value, @"\d+").Value), DetailUrl = detailUrl, PreviewUrl = node2.Attributes["data-original"].Value //PreviewUrl = node1.InnerHtml.Substring(node1.InnerHtml.IndexOf("original=\"") + 10, // node1.InnerHtml.IndexOf("\" src") - node1.InnerHtml.IndexOf("original=\"") - 10) }; item.DownloadDetail = (i, p) => { string html = new MyWebClient { Proxy = p, Encoding = Encoding.UTF8 }.DownloadString(i.DetailUrl); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); HtmlNodeCollection liNodes = doc.DocumentNode.SelectNodes("//li"); HtmlNode imgData = doc.DocumentNode.SelectSingleNode("//*[@id=\"image\"]"); if (imgData != null) { i.Width = Convert.ToInt32(imgData.Attributes["data-original-width"].Value); i.Height = Convert.ToInt32(imgData.Attributes["data-original-height"].Value); i.SampleUrl = imgData.Attributes["src"].Value; } foreach (HtmlNode n in liNodes) { if (n.InnerText.Contains("Posted")) { i.Date = n.InnerText.Substring(n.InnerText.IndexOf("ed: ") + 3, n.InnerText.IndexOf(" by") - n.InnerText.IndexOf("d: ") - 3); } if (n.InnerHtml.Contains("by")) { i.Author = n.InnerText.Substring(n.InnerText.LastIndexOf(' ') + 1, n.InnerText.Length - n.InnerText.LastIndexOf(' ') - 1); } if (n.InnerText.Contains("Source")) { i.Source = n.SelectSingleNode("//*[@rel=\"nofollow\"]").Attributes["href"].Value; } if (n.InnerText.Contains("Rating") && n.InnerText.Contains("Safe")) { i.IsExplicit = false; } else if (n.InnerText.Contains("Rating")) { i.IsExplicit = true; } if (n.InnerText.Contains("Rating") && n.InnerText.Contains("Safe")) { i.IsExplicit = false; } else if (n.InnerText.Contains("Rating")) { i.IsExplicit = true; } if (n.InnerText.Contains("Score")) { i.Score = Convert.ToInt32(n.SelectSingleNode("./span").InnerText); } if (n.InnerHtml.Contains("Original")) { i.OriginalUrl = n.SelectSingleNode("./a").Attributes["href"].Value; i.JpegUrl = n.SelectSingleNode("./a").Attributes["href"].Value; } } }; list.Add(item); } return(list); }
public override void readTablesFromWeb(HtmlDocument page) { string cvss = ""; string content; if (page.DocumentNode != null) { if (page.DocumentNode.SelectNodes("//table[@class='dataTable']") != null) { foreach (HtmlNode table in page.DocumentNode.SelectNodes("//table[@class='dataTable']")) { if (table.SelectNodes("thead") != null) { HtmlNode head = table.SelectSingleNode("thead"); if (head.SelectNodes("tr") != null) { HtmlNode headRow = head.SelectSingleNode("tr"); HtmlNodeCollection headCols = headRow.SelectNodes("th"); if ((headCols.Count == 4) || (headCols.Count == 5) && (scap.myTable.isValidHeader(headCols)))//This table have 4 or 5 colums, so it is a valid table { scap.myTable.setHeaderSupporteTrue(); scap.myTable.setSystemOrComponent(headCols[0].InnerText); if (table.SelectNodes("tbody") != null) { foreach (HtmlNode body in table.SelectNodes("tbody")) { if (body.SelectNodes("tr") != null) { HtmlNode bodyRows = body.SelectSingleNode("tr"); if ((bodyRows.SelectNodes("th") != null)) { HtmlNode bodyHead = bodyRows.SelectSingleNode("th"); // we are assuming that there is gonna be only on <th> tag content = bodyHead.InnerText; scap.myTable.setSupportedHeader(content); } else { foreach (HtmlNode row in body.SelectNodes("tr")) { if (row.SelectNodes("td") != null) { HtmlNodeCollection cols = row.SelectNodes("td"); if (cols.Count == 4) // This are the only rows that we are interested in { processRowFourCols(cols); } if (cols.Count == 5) { processRowFiveCols(cols); } } } } } } } } if (table.SelectNodes("caption") != null) { HtmlNode caption = table.SelectSingleNode("caption"); if (scap.myTable.isCVEHeader(caption.InnerText)) { cvss += scap.myTable.getCVS(headCols); break; } } } } } } scap.set_Cvss(cvss); } /*if (document.Tables.Count > 0) * { * foreach (Novacode.Table table in document.Tables) * { * if (myTable.containsAffectedSoftware(table.Rows[0])) * { * extractTableContent(table); * } * else * { * if (myTable.tableContainsCVEs(table.Rows[0])) * { * cvss += myTable.getCVS(table); * break; * } * } * } * } * set_Cvss(cvss); */ }
// //==================================================================================================== /// <summary> /// Process and return an htmlagility object. Any processing errors are returned in userMessageList. If UserMessageList.count is 0, there wre no errors. /// </summary> /// <param name="cp"></param> /// <param name="htmlDoc"></param> /// <param name="importTypeId"></param> /// <param name="userMessageList"></param> /// <returns></returns> private static bool processHtmlDoc(CPBaseClass cp, HtmlDocument htmlDoc, ImporttypeEnum importTypeId, ref List <string> userMessageList) { // // -- get body (except email template because it uses the full html document if (importTypeId != ImporttypeEnum.EmailTemplate) { // // -- find the data-body or body tag { string xPath = "//*[@data-body]"; HtmlNodeCollection nodeList = htmlDoc.DocumentNode.SelectNodes(xPath); if (nodeList != null) { // // -- import data-body userMessageList.Add("Body set by data-body attribute."); htmlDoc.LoadHtml(nodeList.First().InnerHtml); } else { HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode == null) { // // -- no body found, use entire document userMessageList.Add("No Body found, entire document imported."); } else { // // -- use body string body = bodyNode.InnerHtml; if (string.IsNullOrWhiteSpace(body)) { // // -- body tag not found, import the whole document userMessageList.Add("The content does not include a data-body attribute and the body tag is empty."); return(false); } // // -- body found, set the htmlDoc to the body userMessageList.Add("Html Body imported."); htmlDoc.LoadHtml(body); } } } } // // -- process data-layout nodes. Within each node found, run all other controllers inddividually then save DataLayoutController.process(cp, htmlDoc, ref userMessageList); // // -- process the body DataDeleteController.process(htmlDoc); MustacheVariableController.process(htmlDoc); MustacheSectionController.process(htmlDoc); MustacheTruthyController.process(htmlDoc); MustacheInvertedSectionController.process(htmlDoc); MustacheValueController.process(htmlDoc); DataAddonController.process(cp, htmlDoc); DataHrefController.process(htmlDoc); DataSrcController.process(htmlDoc); // return(true); }
public void DownloadCouncilPdfFiles() { var docs = this.LoadDocumentsDoneSQL(); var queries = this.LoadQueriesDoneSQL(); // var docs = new List<Documents>(); // var queries = new List<QueryResult>(); WebClient c = new WebClient(); HtmlWeb web = new HtmlWeb(); Dictionary <Regex, string> dateRegFormatDic = new Dictionary <Regex, string>(); dateRegFormatDic.Add(new Regex("[A-Za-z]+[\\s]{0,1}[0-9]{1,2},[\\s]{0,1}[0-9]{4}"), " "); dateRegFormatDic.Add(new Regex("[A-Za-z]+[\\s]{0,1}[0-9]{1,2}"), "Month"); dateRegFormatDic.Add(new Regex("[0-9]{2}-[0-9]{2}-[0-9]{2}"), "MM-dd-yy"); dateRegFormatDic.Add(new Regex("[0-9]{2}-[0-9]{1}-[0-9]{2}"), "MM-d-yy"); dateRegFormatDic.Add(new Regex("[0-9]{1}-[0-9]{2}-[0-9]{2}"), "M-dd-yy"); dateRegFormatDic.Add(new Regex("[0-9]{1}-[0-9]{1}-[0-9]{2}"), "M-d-yy"); foreach (string url in this.docUrls) { var subUrl = url.Split('*')[1]; var category = url.Split('*')[0]; HtmlDocument doc = web.Load(subUrl); HtmlNodeCollection list = doc.DocumentNode.SelectNodes("//a[contains(@href,'/LinkClick.aspx')]"); foreach (var r in list) { var dateConvert = false; DateTime meetingDate = DateTime.MinValue; foreach (var dateRegKey in dateRegFormatDic.Keys) { string format = dateRegFormatDic[dateRegKey]; string meetingDateText = dateRegKey.Match(r.InnerText).ToString(); if (string.IsNullOrWhiteSpace(format)) { if (DateTime.TryParse(meetingDateText, out meetingDate)) { dateConvert = true; break; } } if (format == "Month" && !string.IsNullOrWhiteSpace(meetingDateText)) { try { var year = r.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.PreviousSibling.PreviousSibling.InnerText; var dateStr = meetingDateText + ", " + year; if (DateTime.TryParse(dateStr, out meetingDate)) { dateConvert = true; break; } } catch { continue; } } if (DateTime.TryParseExact(meetingDateText, format, System.Globalization.CultureInfo.InvariantCulture, System.Globalization.DateTimeStyles.None, out meetingDate)) { dateConvert = true; break; } } if (!dateConvert) { Console.WriteLine(r.InnerText); Console.WriteLine("date format incorrect..."); continue; } if (meetingDate < this.dtStartFrom) { Console.WriteLine("Early..."); continue; } // Console.WriteLine(string.Format("datestr:{0},meeting:{1}", r.InnerText, meetingDate.ToString("yyyy-MM-dd"))); this.ExtractADoc(c, this.cityEntity.CityUrl + r.Attributes["href"].Value, category, "pdf", meetingDate, ref docs, ref queries); } } Console.WriteLine("docs:" + docs.Count + "--- query:" + queries.Count); // Console.ReadKey(); }
public void GetByNameTest() { HtmlElement root = new HtmlElement("root"); HtmlNodeCollection target = new HtmlNodeCollection(root); target.Add(new HtmlElement("first")); target.Add(new HtmlElement("second")); target.Add(new HtmlElement("second")); Assert.AreEqual(target.GetByName("second").Count, 2); ((HtmlElement)target[0]).Nodes.Add(new HtmlElement("second")); Assert.AreEqual(target.GetByName("second", false).Count, 2); Assert.AreEqual(target.GetByName("second").Count, 3); }
private void Main_Load(object sender, EventArgs e) { groupBox1.Hide(); groupBox2.Hide(); if (!Data.Login_1) { label1.Text = "欢迎回来!"; } dataGridView1.AutoSizeRowsMode = DataGridViewAutoSizeRowsMode.AllCellsExceptHeaders; #region//加载天气 try { HtmlWeb htmlWeb = new HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = htmlWeb.Load("http://i.tianqi.com/index.php?c=code&id=34&icon=1&num=2"); ///显示所在地 HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//a"); toolStripStatusLabel2.Text = htmlNodes[0].InnerText.Trim(); ///插入图片 ////*[@id="mobile280"]/div/a[2]/span[1]/img htmlNodes = doc.DocumentNode.SelectNodes("//*[@id='mobile280']/div/a[2]/span[1]/img"); string url = "http:" + htmlNodes[0].Attributes["src"].Value; // 图片测试 System.Net.WebRequest webreq = System.Net.WebRequest.Create(url); System.Net.WebResponse webres = webreq.GetResponse(); using (System.IO.Stream stream = webres.GetResponseStream()) { toolStripStatusLabel3.Image = Image.FromStream(stream); } ///获取天气情况 htmlNodes = doc.DocumentNode.SelectNodes("//span"); String tianqi = "|温度:"; foreach (var iteam in htmlNodes) { tianqi += iteam.InnerText.Trim(); } doc = htmlWeb.Load("http://www.tianqi.com/"); htmlNodes = doc.DocumentNode.SelectNodes("//p[@class='p_2']"); tianqi += "|" + htmlNodes[0].InnerText; htmlNodes = doc.DocumentNode.SelectNodes("//p[@class='p_3']"); tianqi += "|" + htmlNodes[0].InnerText; htmlNodes = doc.DocumentNode.SelectNodes("//p[@class='p_1']"); tianqi = htmlNodes[0].InnerText + tianqi; toolStripStatusLabel4.Text = tianqi + "|"; } catch (Exception e1) { MessageBox.Show("天气加载失败!", "提示"); Data.WriteLog("天气加载失败!" + e1.Message.ToString(), 1); statusStrip1.Hide(); } #endregion }
public void InsertTest() { HtmlElement root = new HtmlElement("root"); HtmlNodeCollection target = new HtmlNodeCollection(root); HtmlElement child = new HtmlElement("child"); target.Add(child); child = new HtmlElement("second"); target.Insert(0, child); Assert.AreEqual(root, child.Parent); Assert.AreEqual(target.IndexOf(child), 0); target.Insert(0, null); }
/// <summary> /// Adds the specified node to the end of the list of children of this node. /// </summary> /// <param name="newChildren">The node list to add. May not be null.</param> public void AppendChildren(HtmlNodeCollection newChildren) { if (newChildren == null) throw new ArgumentNullException("newChildren"); foreach (HtmlNode newChild in newChildren) { AppendChild(newChild); } }
public void ItemByNameTest() { HtmlElement root = new HtmlElement("root"); HtmlNodeCollection target = new HtmlNodeCollection(root); target.Add(new HtmlElement("first")); target.Add(new HtmlElement("second")); target.Add(new HtmlElement("second")); Assert.IsNotNull(target["second"]); Assert.IsNull(target["anyname"]); }
private static void Entitize(HtmlNodeCollection collection) { foreach (HtmlNode node in collection) { if (node.HasAttributes) Entitize(node.Attributes); if (node.HasChildNodes) { Entitize(node.ChildNodes); } else { if (node.NodeType == HtmlNodeType.Text) { ((HtmlTextNode)node).Text = Entitize(((HtmlTextNode)node).Text, true, true); } } } }
public string insert_event_data_new(string html) { StringBuilder sb = new StringBuilder(); html = html.Replace("<thead=\"\"", ""); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); HtmlNodeCollection nodes_all = doc.DocumentNode.SelectNodes(@"//*"); List <HtmlNode> nodes = new List <HtmlNode>(); ArrayList list_lg = new ArrayList(); string league = ""; DateTime dt_server = new DateTime(); string start_time = ""; string host = ""; string client = ""; string win = ""; string draw = ""; string lose = ""; string date = ""; string time = ""; string zone = ""; string event_id = ""; HtmlNode node_timer = doc.DocumentNode.SELECT_NODE("/html[1]/head[1]/script[2]"); BsonDocument doc_timer = MongoHelper.get_doc_from_str(node_timer.InnerText.Replace("//<![CDATA[", "").Replace("//]]>>", "").Replace("initData =", "")); string server_time = doc_timer["serverTime"].ToString().E_TRIM(); string zone_gmt = doc_timer["tzPrefix"].ToString().E_TRIM(); zone_gmt = zone_gmt.Replace("GMT", ""); zone = string.IsNullOrEmpty(zone_gmt) ? "0" : zone_gmt; string[] times = server_time.E_SPLIT(","); dt_server = new DateTime(Convert.ToInt16(times[0]), Convert.ToInt16(times[1]), Convert.ToInt16(times[2]), Convert.ToInt16(times[3]), Convert.ToInt16(times[4]), Convert.ToInt16(times[5])); //TimeSpan span = DateTime.Now - dt_server; //zone = (8 - Math.Round(span.TotalHours)).ToString(); foreach (HtmlNode node in nodes_all) { if (node.Id == "container_EVENTS") { HtmlNodeCollection nodes_div = node.SELECT_NODES("/div"); foreach (HtmlNode node_div in nodes_div) { if (node_div.Id.Contains("container")) { league = node_div.SELECT_NODE("div[1]/h2[1]").InnerText; HtmlNode test = node_div.SELECT_NODE("div[2]/div[1]/table[1]"); HtmlNodeCollection nodes_table = node_div.SELECT_NODES("div[2]/div[1]/table[1]/tbody"); foreach (HtmlNode node_table in nodes_table) { if (node_table.Id.Contains("event")) { event_id = node_table.Id.Replace("event_", ""); date = node_table.SELECT_NODE("/tr[1]/td[1]/table[1]/tr[1]/td[2]").InnerText.E_TRIM(); date = date.Replace("2015", ""); if (date.Length == 10) { start_time = dt_server.ToString("yyyy-") + Tool.get_12m_from_eng(date.Substring(2, 3)) + "-" + date.Substring(0, 2) + " " + date.Substring(5, 5); } if (date.Length == 5) { start_time = dt_server.ToString("yyyy-") + dt_server.ToString("MM-dd") + " " + date; } DateTime dt_time = Convert.ToDateTime(start_time); dt_time = dt_time.AddHours(Convert.ToInt16(zone) * (-1)); host = node_table.SELECT_NODE("/tr[1]/td[1]/table[1]/tr[1]/td[1]/span[1]/div[1]").InnerText; client = node_table.SELECT_NODE("/tr[1]/td[1]/table[1]/tr[1]/td[1]/span[1]/div[2]").InnerText; win = node_table.SELECT_NODE("/tr[1]/td[2]").InnerText.E_REMOVE(); draw = node_table.SELECT_NODE("/tr[1]/td[3]").InnerText.E_REMOVE(); lose = node_table.SELECT_NODE("/tr[1]/td[4]").InnerText.E_REMOVE(); if (win.Contains("/")) { win = AnalyseTool.convert_english_odd(win); draw = AnalyseTool.convert_english_odd(draw); lose = AnalyseTool.convert_english_odd(lose); } if (!string.IsNullOrEmpty(win.E_TRIM()) && !string.IsNullOrEmpty(draw.E_TRIM()) && !string.IsNullOrEmpty(lose.E_TRIM())) { sb.AppendLine(event_id.PR(10) + league.PR(50) + dt_time.ToString("yyyy-MM-dd HH:mm:ss").PR(20) + host.PR(30) + client.PR(30) + win.PR(10) + draw.PR(10) + lose.PR(10)); Match100Helper.insert_data("marathonbet", league, start_time, host, client, win, draw, lose, "0", zone); MbSQL.insert_events(event_id, league.E_TRIM(), dt_time.ToString("yyyy-MM-dd HH:mm:ss"), host, client); ; } } } } } } } return(sb.ToString()); }
/// <summary> /// This will create a new document object by parsing the Html specified. /// </summary> /// <param attributeName="html">The Html to parse.</param> private HtmlDocument(Stream stream) { this.nodes = HtmlParser.Parse(stream); }
/// <summary> /// Selects a list of nodes matching the <see cref="XPath"/> expression. /// </summary> /// <param name="xpath">The XPath expression.</param> /// <returns>An <see cref="HtmlNodeCollection"/> containing a collection of nodes matching the <see cref="XPath"/> query</returns> public HtmlNodeCollection SelectNodes(string xpath, XmlNamespaceManager xmgr) { HtmlNodeCollection list = new HtmlNodeCollection(null); HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this); XPathNodeIterator it = nav.Select(xpath, xmgr); while (it.MoveNext()) { HtmlNodeNavigator n = (HtmlNodeNavigator)it.Current; list.Add(n.CurrentNode); } if (list.Count == 0) { return list; } return list; }
public string get_event_with_detail(string html) { StringBuilder sb = new StringBuilder(); //------------------------------------------- html = html.Replace("<thead=\"\"", ""); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); HtmlNodeCollection nodes_all = doc.DocumentNode.SelectNodes(@"//*"); List <HtmlNode> nodes = new List <HtmlNode>(); ArrayList list_lg = new ArrayList(); string league = ""; string start_time = ""; string host = ""; string client = ""; string win = ""; string draw = ""; string lose = ""; string date = ""; string time = ""; string zone = ""; string event_id = ""; DateTime dt_time = DateTime.Now; foreach (HtmlNode node in nodes_all) { zone = "8"; if (node.Id == "timer") { //02.09.14, 14:47 (GMT+1) string timer = node.InnerText.E_TRIM(); DateTime dt_timer = new DateTime(Convert.ToInt16("20" + timer.Substring(6, 2)), Convert.ToInt16(timer.Substring(3, 2)), Convert.ToInt16(timer.Substring(0, 2)), Convert.ToInt16(timer.Substring(9, 2)), Convert.ToInt16(timer.Substring(12, 2)), 0); TimeSpan span = DateTime.Now - dt_timer; zone = (8 - Math.Round(span.TotalHours)).ToString(); } if (node.Id == "container_EVENTS") { HtmlNodeCollection nodes_div = node.SELECT_NODES("/div"); foreach (HtmlNode node_div in nodes_div) { if (node_div.Id.Contains("container")) { league = node_div.SELECT_NODE("div[1]/h2[1]").InnerText.E_REMOVE(); HtmlNode test = node_div.SELECT_NODE("div[2]/div[1]/table[1]"); HtmlNodeCollection nodes_table = node_div.SELECT_NODES("div[2]/div[1]/table[1]/tbody"); if (nodes_table == null) { continue; } foreach (HtmlNode node_table in nodes_table) { if (node_table.Id.Contains("event")) { event_id = node_table.Id.Replace("event_", ""); date = node_table.SELECT_NODE("/tr[1]/td[1]/table[1]/tbody[1]/tr[1]/td[2]").InnerText.E_TRIM(); date = date.Replace("2015", ""); if (date.Length == 10) { start_time = Tool.get_12m_from_eng(date.Substring(2, 3)) + "-" + date.Substring(0, 2) + "●" + date.Substring(5, 5); } if (date.Length == 5) { start_time = dt_time.ToString("MM-dd") + "●" + date; } host = node_table.SELECT_NODE("/tr[1]/td[1]/table[1]/tbody[1]/tr[1]/td[1]/span[1]/div[1]").InnerText.E_REMOVE(); client = node_table.SELECT_NODE("/tr[1]/td[1]/table[1]/tbody[1]/tr[1]/td[1]/span[1]/div[2]").InnerText.E_REMOVE(); win = node_table.SELECT_NODE("/tr[1]/td[2]").InnerText.E_REMOVE(); draw = node_table.SELECT_NODE("/tr[1]/td[3]").InnerText.E_REMOVE(); lose = node_table.SELECT_NODE("/tr[1]/td[4]").InnerText.E_REMOVE(); if (win.Contains("/")) { win = AnalyseTool.convert_english_odd(win); draw = AnalyseTool.convert_english_odd(draw); lose = AnalyseTool.convert_english_odd(lose); } if (!string.IsNullOrEmpty(win.E_TRIM()) && !string.IsNullOrEmpty(draw.E_TRIM()) && !string.IsNullOrEmpty(lose.E_TRIM())) { sb.AppendLine(league.PR(50) + start_time.PR(20) + host.PR(30) + client.PR(30) + win.PR(10) + draw.PR(10) + lose.PR(10)); //Match100Helper.insert_data("marathonbet", league, start_time, host, client, win, draw, lose, "1", zone); } MbSQL.insert_events(event_id, league, dt_time.ToString("yyyy-MM-dd HH:mm:ss"), host, client); //get the detail information HtmlNodeCollection nodes_tr = node_table.SELECT_NODES("tr"); foreach (HtmlNode node_tr in nodes_tr) { if (node_tr.CLASS().Contains("market-details")) { sb.AppendLine("------------------------------------------------------------------------------------"); HtmlNodeCollection nodes_block = node_tr.SELECT_NODES("/td[1]/div[2]/div[1]/div"); //block-market-wrapper foreach (HtmlNode node_block in nodes_block) { HtmlNodeCollection nodes_div1 = node_block.SELECT_NODES("/div[4]/div"); if (nodes_div1 != null) { foreach (HtmlNode node_div1 in nodes_div1) { string odd_type = node_div1.SELECT_NODE("/div[1]").InnerText.E_REMOVE(); sb.AppendLine(odd_type); sb.AppendLine("--------------------"); HtmlNodeCollection nodes_tr1 = node_div1.SELECT_NODES("/table[1]/tbody[1]/tr"); foreach (HtmlNode node_tr1 in nodes_tr1) { //sb.AppendLine(node_tr1.InnerText.E_REMOVE()); HtmlNodeCollection nodes_td1 = node_tr1.SELECT_NODES("/td"); ArrayList list = new ArrayList(); if (nodes_td1 != null) { for (int i = 0; i < nodes_td1.Count; i++) { HtmlNodeCollection nodes_div2 = nodes_td1[i].SELECT_NODES("/div[1]/div"); if (nodes_div2 != null) { for (int j = 0; j < nodes_div2.Count; j++) { sb.Append(nodes_div2[j].InnerText.E_REMOVE().PR(10)); list.Add(nodes_div2[j].InnerText.E_REMOVE().Replace("(", "").Replace(")", "").Replace("+", "")); } } } sb.Append(M.N); } if (odd_type == "To Win Match With Handicap" && list.Count >= 4) { MbSQL.insert_odds(event_id, "2", "To Win Match With Handicap", "FULL", "", "", "", "", "", list[0].ToString(), list[2].ToString(), "", "", "", "", list[1].ToString(), list[3].ToString(), "", "", "", ""); } if (odd_type == "To Win 1st Half With Handicap" && list.Count >= 4) { MbSQL.insert_odds(event_id, "2", "To Win 1st Half With Handicap", "1-HALF", "", "", "", "", "", list[0].ToString(), list[2].ToString(), "", "", "", "", list[1].ToString(), list[3].ToString(), "", "", "", ""); } if (odd_type == "To Win 2nd Half With Handicap" && list.Count >= 4) { MbSQL.insert_odds(event_id, "2", "To Win 2nd Half With Handicap", "2-HALF", "", "", "", "", "", list[0].ToString(), list[2].ToString(), "", "", "", "", list[1].ToString(), list[3].ToString(), "", "", "", ""); } if (odd_type == "Total Goals" && list.Count >= 4) { MbSQL.insert_odds(event_id, "3", "Total Goals", "FULL", "", "", "", "", "", list[0].ToString(), list[2].ToString(), "", "", "", "", list[1].ToString(), list[3].ToString(), "", "", "", ""); } if (odd_type == "Total Goals - 1st Half" && list.Count >= 4) { MbSQL.insert_odds(event_id, "3", "Total Goals - 1st Half", "1-HALF", "", "", "", "", "", list[0].ToString(), list[2].ToString(), "", "", "", "", list[1].ToString(), list[3].ToString(), "", "", "", ""); } if (odd_type == "Total Goals - 2nd Half" && list.Count >= 4) { MbSQL.insert_odds(event_id, "3", "Total Goals - 2nd Half", "2-HALF", "", "", "", "", "", list[0].ToString(), list[2].ToString(), "", "", "", "", list[1].ToString(), list[3].ToString(), "", "", "", ""); } if (odd_type.Contains("Total Goals") && odd_type.Contains(host) && !odd_type.Contains("+") && list.Count >= 4) { MbSQL.insert_odds(event_id, "4", "Total Goals", "FULL", "HOME", "", "", "", "", list[0].ToString(), list[2].ToString(), "", "", "", "", list[1].ToString(), list[3].ToString(), "", "", "", ""); } if (odd_type.Contains("Total Goals") && odd_type.Contains(client) && !odd_type.Contains("+") && list.Count >= 4) { MbSQL.insert_odds(event_id, "4", "Total Goals", "FULL", "AWAY", "", "", "", "", list[0].ToString(), list[2].ToString(), "", "", "", "", list[1].ToString(), list[3].ToString(), "", "", "", ""); } } sb.AppendLine("--------------------"); } } } sb.AppendLine("------------------------------------------------------------------------------------"); } } } } } } } } //------------------------------------------------------ return(sb.ToString()); }
public string insert_event_data(string html) { StringBuilder sb = new StringBuilder(); //------------------------------------------- html = html.Replace("<thead=\"\"", ""); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); HtmlNodeCollection nodes_all = doc.DocumentNode.SelectNodes(@"//*"); List <HtmlNode> nodes = new List <HtmlNode>(); ArrayList list_lg = new ArrayList(); string league = ""; string start_time = ""; string host = ""; string client = ""; string win = ""; string draw = ""; string lose = ""; string date = ""; string time = ""; string zone = ""; DateTime dt_time = DateTime.Now; foreach (HtmlNode node in nodes_all) { zone = "8"; if (node.Id == "timer") { //02.09.14, 14:47 (GMT+1) string timer = node.InnerText.E_TRIM(); DateTime dt_timer = new DateTime(Convert.ToInt16("20" + timer.Substring(6, 2)), Convert.ToInt16(timer.Substring(3, 2)), Convert.ToInt16(timer.Substring(0, 2)), Convert.ToInt16(timer.Substring(9, 2)), Convert.ToInt16(timer.Substring(12, 2)), 0); TimeSpan span = DateTime.Now - dt_timer; zone = (8 - Math.Round(span.TotalHours)).ToString(); } if (node.Id == "container_EVENTS") { HtmlNodeCollection nodes_div = node.SELECT_NODES("/div"); foreach (HtmlNode node_div in nodes_div) { if (node_div.Id.Contains("container")) { league = node_div.SELECT_NODE("div[1]/h2[1]").InnerText; HtmlNode test = node_div.SELECT_NODE("div[2]/div[1]/table[1]"); HtmlNodeCollection nodes_table = node_div.SELECT_NODES("div[2]/div[1]/table[1]/tbody"); if (nodes_table == null) { continue; } foreach (HtmlNode node_table in nodes_table) { if (node_table.Id.Contains("event")) { date = node_table.SELECT_NODE("/tr[1]/td[1]/table[1]/tbody[1]/tr[1]/td[2]").InnerText.E_TRIM(); date = date.Replace("2015", ""); if (date.Length == 10) { start_time = Tool.get_12m_from_eng(date.Substring(2, 3)) + "-" + date.Substring(0, 2) + "●" + date.Substring(5, 5); } if (date.Length == 5) { start_time = dt_time.ToString("MM-dd") + "●" + date; } host = node_table.SELECT_NODE("/tr[1]/td[1]/table[1]/tbody[1]/tr[1]/td[1]/span[1]/div[1]").InnerText; client = node_table.SELECT_NODE("/tr[1]/td[1]/table[1]/tbody[1]/tr[1]/td[1]/span[1]/div[2]").InnerText; win = node_table.SELECT_NODE("/tr[1]/td[2]").InnerText.E_REMOVE(); draw = node_table.SELECT_NODE("/tr[1]/td[3]").InnerText.E_REMOVE(); lose = node_table.SELECT_NODE("/tr[1]/td[4]").InnerText.E_REMOVE(); if (win.Contains("/")) { win = AnalyseTool.convert_english_odd(win); draw = AnalyseTool.convert_english_odd(draw); lose = AnalyseTool.convert_english_odd(lose); } if (!string.IsNullOrEmpty(win.E_TRIM()) && !string.IsNullOrEmpty(draw.E_TRIM()) && !string.IsNullOrEmpty(lose.E_TRIM())) { sb.AppendLine(league.PR(50) + start_time.PR(20) + host.PR(30) + client.PR(30) + win.PR(10) + draw.PR(10) + lose.PR(10)); Match100Helper.insert_data("marathonbet", league, start_time, host, client, win, draw, lose, "1", zone); } } } } } } } //------------------------------------------------------ return(sb.ToString()); }
private async Task CheckPage(HtmlDocument htmlDocument) { if (htmlDocument == null) { Bot.ArchiLogger.LogNullError(nameof(htmlDocument)); return; } HtmlNodeCollection htmlNodes = htmlDocument.DocumentNode.SelectNodes("//div[@class='badge_row_inner']"); if (htmlNodes == null) { // No eligible badges whatsoever return; } HashSet <Task> backgroundTasks = new HashSet <Task>(); foreach (HtmlNode htmlNode in htmlNodes) { HtmlNode statsNode = htmlNode.SelectSingleNode(".//div[@class='badge_title_stats_content']"); HtmlNode appIDNode = statsNode?.SelectSingleNode(".//div[@class='card_drop_info_dialog']"); if (appIDNode == null) { // It's just a badge, nothing more continue; } string appIDText = appIDNode.GetAttributeValue("id", null); if (string.IsNullOrEmpty(appIDText)) { Bot.ArchiLogger.LogNullError(nameof(appIDText)); continue; } string[] appIDSplitted = appIDText.Split('_'); if (appIDSplitted.Length < 5) { Bot.ArchiLogger.LogNullError(nameof(appIDSplitted)); continue; } appIDText = appIDSplitted[4]; if (!uint.TryParse(appIDText, out uint appID) || (appID == 0)) { Bot.ArchiLogger.LogNullError(nameof(appID)); continue; } if (GlobalConfig.SalesBlacklist.Contains(appID) || Program.GlobalConfig.Blacklist.Contains(appID) || Bot.IsBlacklistedFromIdling(appID) || (Bot.BotConfig.IdlePriorityQueueOnly && !Bot.IsPriorityIdling(appID))) { // We're configured to ignore this appID, so skip it continue; } if (IgnoredAppIDs.TryGetValue(appID, out DateTime ignoredUntil)) { if (ignoredUntil < DateTime.UtcNow) { // This game served its time as being ignored IgnoredAppIDs.TryRemove(appID, out _); } else { // This game is still ignored continue; } } // Cards HtmlNode progressNode = statsNode.SelectSingleNode(".//span[@class='progress_info_bold']"); if (progressNode == null) { Bot.ArchiLogger.LogNullError(nameof(progressNode)); continue; } string progressText = progressNode.InnerText; if (string.IsNullOrEmpty(progressText)) { Bot.ArchiLogger.LogNullError(nameof(progressText)); continue; } ushort cardsRemaining = 0; Match progressMatch = Regex.Match(progressText, @"\d+"); // This might fail if we have no card drops remaining, 0 is not printed in this case - that's fine if (progressMatch.Success) { if (!ushort.TryParse(progressMatch.Value, out cardsRemaining) || (cardsRemaining == 0)) { Bot.ArchiLogger.LogNullError(nameof(cardsRemaining)); continue; } } if (cardsRemaining == 0) { // Normally we'd trust this information and simply skip the rest // However, Steam is so f****d up that we can't simply assume that it's correct // It's entirely possible that actual game page has different info, and badge page lied to us // We can't check every single game though, as this will literally kill people with cards from games they don't own // Luckily for us, it seems to happen only with some specific games if (!UntrustedAppIDs.Contains(appID)) { continue; } // To save us on extra work, check cards earned so far first HtmlNode cardsEarnedNode = statsNode.SelectSingleNode(".//div[@class='card_drop_info_header']"); if (cardsEarnedNode == null) { Bot.ArchiLogger.LogNullError(nameof(cardsEarnedNode)); continue; } string cardsEarnedText = cardsEarnedNode.InnerText; if (string.IsNullOrEmpty(cardsEarnedText)) { Bot.ArchiLogger.LogNullError(nameof(cardsEarnedText)); continue; } Match cardsEarnedMatch = Regex.Match(cardsEarnedText, @"\d+"); if (!cardsEarnedMatch.Success) { Bot.ArchiLogger.LogNullError(nameof(cardsEarnedMatch)); continue; } if (!ushort.TryParse(cardsEarnedMatch.Value, out ushort cardsEarned)) { Bot.ArchiLogger.LogNullError(nameof(cardsEarned)); continue; } if (cardsEarned > 0) { // If we already earned some cards for this game, it's very likely that it's done // Let's hope that trusting cardsRemaining AND cardsEarned is enough // If I ever hear that it's not, I'll most likely need a doctor continue; } // If we have no cardsRemaining and no cardsEarned, it's either: // - A game we don't own physically, but we have cards from it in inventory // - F2P game that we didn't spend any money in, but we have cards from it in inventory // - Steam issue // As you can guess, we must follow the rest of the logic in case of Steam issue } // Hours HtmlNode timeNode = statsNode.SelectSingleNode(".//div[@class='badge_title_stats_playtime']"); if (timeNode == null) { Bot.ArchiLogger.LogNullError(nameof(timeNode)); continue; } string hoursText = timeNode.InnerText; if (string.IsNullOrEmpty(hoursText)) { Bot.ArchiLogger.LogNullError(nameof(hoursText)); continue; } float hours = 0.0F; Match hoursMatch = Regex.Match(hoursText, @"[0-9\.,]+"); // This might fail if we have exactly 0.0 hours played, as it's not printed in that case - that's fine if (hoursMatch.Success) { if (!float.TryParse(hoursMatch.Value, NumberStyles.Number, CultureInfo.InvariantCulture, out hours) || (hours <= 0.0F)) { Bot.ArchiLogger.LogNullError(nameof(hours)); continue; } } // Names HtmlNode nameNode = statsNode.SelectSingleNode("(.//div[@class='card_drop_info_body'])[last()]"); if (nameNode == null) { Bot.ArchiLogger.LogNullError(nameof(nameNode)); continue; } string name = nameNode.InnerText; if (string.IsNullOrEmpty(name)) { Bot.ArchiLogger.LogNullError(nameof(name)); continue; } // We handle two cases here - normal one, and no card drops remaining int nameStartIndex = name.IndexOf(" by playing ", StringComparison.Ordinal); if (nameStartIndex <= 0) { nameStartIndex = name.IndexOf("You don't have any more drops remaining for ", StringComparison.Ordinal); if (nameStartIndex <= 0) { Bot.ArchiLogger.LogNullError(nameof(nameStartIndex)); continue; } nameStartIndex += 32; // + 12 below } nameStartIndex += 12; int nameEndIndex = name.LastIndexOf('.'); if (nameEndIndex <= nameStartIndex) { Bot.ArchiLogger.LogNullError(nameof(nameEndIndex)); continue; } name = WebUtility.HtmlDecode(name.Substring(nameStartIndex, nameEndIndex - nameStartIndex)); // Levels byte badgeLevel = 0; HtmlNode levelNode = htmlNode.SelectSingleNode(".//div[@class='badge_info_description']/div[2]"); if (levelNode != null) { // There is no levelNode if we didn't craft that badge yet (level 0) string levelText = levelNode.InnerText; if (string.IsNullOrEmpty(levelText)) { Bot.ArchiLogger.LogNullError(nameof(levelText)); continue; } int levelIndex = levelText.IndexOf("Level ", StringComparison.OrdinalIgnoreCase); if (levelIndex < 0) { Bot.ArchiLogger.LogNullError(nameof(levelIndex)); continue; } levelIndex += 6; if (levelText.Length <= levelIndex) { Bot.ArchiLogger.LogNullError(nameof(levelIndex)); continue; } levelText = levelText.Substring(levelIndex, 1); if (!byte.TryParse(levelText, out badgeLevel) || (badgeLevel == 0) || (badgeLevel > 5)) { Bot.ArchiLogger.LogNullError(nameof(badgeLevel)); continue; } } // Done with parsing, we have two possible cases here // Either we have decent info about appID, name, hours, cardsRemaining (cardsRemaining > 0) and level // OR we strongly believe that Steam lied to us, in this case we will need to check game individually (cardsRemaining == 0) if (cardsRemaining > 0) { GamesToFarm.Add(new Game(appID, name, hours, cardsRemaining, badgeLevel)); } else { Task task = CheckGame(appID, name, hours, badgeLevel); switch (Program.GlobalConfig.OptimizationMode) { case GlobalConfig.EOptimizationMode.MinMemoryUsage: await task.ConfigureAwait(false); break; default: backgroundTasks.Add(task); break; } } } // If we have any background tasks, wait for them if (backgroundTasks.Count > 0) { await Task.WhenAll(backgroundTasks).ConfigureAwait(false); } }
/// <summary> /// This will find the corresponding opening tag for the named one. This is identified as /// the most recently read nodes with the same attributeName, but with no child nodes. /// </summary> private static int FindTagOpenNodeIndex(HtmlNodeCollection nodes, string name) { for (int index = nodes.Count - 1; index >= 0; index--) { HtmlElement element = nodes[index] as HtmlElement; if (null != element && (string.Compare(element.Name, name, StringComparison.OrdinalIgnoreCase) == 0) && element.Nodes.Count == 0 && !element.IsTerminated) { return index; } } return -1; }
/// <summary> /// Get page reviews /// </summary> /// <param name="pageIdOrAlias"></param> /// <returns></returns> public PageReviewInfo GetPageReviewInfo(string pageIdOrAlias) { var pageId = string.Empty; // Checking if pageIdOrAlias is pageAlias or PageId MatchCollection matches = Regex.Matches(pageIdOrAlias, "\\d+"); // param passed is page alias, we need to get page id from it. if (matches.Count != 1) { pageId = GetPageId(pageIdOrAlias); } if (pageId.Length == 0) { throw new ArgumentException("Can not detect page id of " + pageIdOrAlias); } HtmlNode htmlNode = this.BuildDom("https://m.facebook.com/page/reviews.php?id=" + pageIdOrAlias); if (ConstString.PageNotFound.Any(text => htmlNode.InnerHtml.Contains(text))) { throw new ReviewPageNotFoundException(pageIdOrAlias + " not contains review page"); } // get review nodes -- review node contain user's review // TODO : Replace with more safely xpath HtmlNodeCollection reviewNodes = htmlNode.SelectNodes("/html/body/div/div/div[2]/div[2]/div[1]/div/div[3]/div/div/div/div"); // Create page review var pageReview = new PageReviewInfo(); // loop through DOM reviewNodes foreach (var reviewNode in reviewNodes) { // create new instance of review info var reviewInfo = new PageReview(); // Get avatar HtmlNode imgAvatarNode = reviewNode.SelectSingleNode("div/div/div[1]/a/div/img"); if (imgAvatarNode != null) { reviewInfo.UserAvatarUrl = WebUtility.HtmlDecode(imgAvatarNode.GetAttributeValue("src", string.Empty)); } // User name and id HtmlNode userNameIdNode = reviewNode.SelectSingleNode("div/div/div[2]/div/div[1]/a[1]"); if (userNameIdNode != null) { // Get urlink and parse string urlLink = userNameIdNode.GetAttributeValue("href", null); if (urlLink != null) { if (urlLink.Contains("/profile.php?id=")) { reviewInfo.UserId = urlLink.Substring(16); // /profile.php?id=100012141183155 } else { reviewInfo.UserId = urlLink.Substring(1); // /kakarotto.pham.9 } } HtmlNode nameNode = userNameIdNode.SelectSingleNode("span"); if (nameNode != null) { reviewInfo.UserDisplayName = WebUtility.HtmlDecode(nameNode.InnerText + string.Empty); } } // Get rate score HtmlNode rateScoreNode = reviewNode.SelectSingleNode("div/div/div[2]/div/div[1]/a[2]/img"); // return -1 indicate that can not recognize value if (rateScoreNode != null) { reviewInfo.RateScore = int.Parse(rateScoreNode.GetAttributeValue("alt", "-1"), CultureInfo.CurrentCulture); } // Get fully rate content page HtmlNode rateContentNode = reviewNode.SelectSingleNode("div/div/div[2]/div/div[1]/a[2]"); if (rateContentNode != null) { string rateContentAnchorLink = rateContentNode.GetAttributeValue("href", null); if (rateContentAnchorLink != null) { HtmlNode htmlRateContentNode = this.BuildDom("https://m.facebook.com" + rateContentAnchorLink); // TODO : Replace with more safely HtmlNode contentNode = htmlRateContentNode.SelectSingleNode("html/body/div/div/div[2]/div/div[1]/div/div[1]/div/div[1]/div[2]/p"); if (contentNode != null) { reviewInfo.Content = contentNode.InnerText; } } } pageReview.Reviews.Add(reviewInfo); } return(pageReview); }
private static HtmlNodeCollection BuildNodeCollection(Queue<string> tokens) { HtmlNodeCollection nodes = new HtmlNodeCollection(null); HtmlElement element = null; string current; while (tokens.Count > 0) { current = tokens.Dequeue(); switch (current) { case ("<"): // Read open tag if (tokens.Count == 0) break; current = tokens.Dequeue(); element = new HtmlElement(current); // read the attributes and values while (tokens.Count > 0 && (current = tokens.Dequeue()) != ">" && current != "/>") { string attribute_name = current; if (tokens.Count > 0 && tokens.Peek() == "=") { tokens.Dequeue(); current = (tokens.Count > 0) ? tokens.Dequeue() : null; HtmlAttribute attribute = new HtmlAttribute(attribute_name, HttpUtility.HtmlDecode(current)); element.Attributes.Add(attribute); } else //if (tokens.Count == 0) { // Null-attributeValue attribute HtmlAttribute attribute = new HtmlAttribute(attribute_name); element.Attributes.Add(attribute); } } nodes.Add(element); if (current == "/>") { element.IsTerminated = true; element = null; //could not have any sub elements } else if (current == ">") { continue; } break; case (">"): continue; case ("</"): // Read close tag if (tokens.Count == 0) break; current = tokens.Dequeue(); int open_index = FindTagOpenNodeIndex(nodes, current); if (open_index != -1) { MoveNodesDown(ref nodes, open_index + 1, (HtmlElement)nodes[open_index]); } // Skip to the end of this tag while (tokens.Count > 0 && (current = tokens.Dequeue()) != ">") { //shouldn't happen } element = null; break; default: HtmlText node = new HtmlText(current); nodes.Add(node); break; } } return nodes; }
private void GetList(IListSheet listSheet) { String exportDir = this.RunPage.GetExportDir(); string pageSourceDir = this.RunPage.GetDetailSourceFileDir(); Dictionary <string, int> resultColumnDic = new Dictionary <string, int>(); resultColumnDic.Add("cate1", 0); resultColumnDic.Add("cateId1", 1); resultColumnDic.Add("cate2", 2); resultColumnDic.Add("cateId2", 3); resultColumnDic.Add("cate3", 4); resultColumnDic.Add("cateId3", 5); resultColumnDic.Add("name", 6); resultColumnDic.Add("wordCount", 7); string resultFilePath = Path.Combine(exportDir, "教育_术语_scel详情信息.xlsx"); ExcelWriter resultEW = new ExcelWriter(resultFilePath, "List", resultColumnDic, null); for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); string detailUrl = row["detailPageUrl"]; string cate1 = row["cate1"]; string cate2 = row["cate2"]; string cateId1 = row["cateId1"]; string cateId2 = row["cateId2"]; bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]); if (!giveUp) { string localFilePath = this.RunPage.GetFilePath(detailUrl, pageSourceDir); try { HtmlAgilityPack.HtmlDocument htmlDoc = this.RunPage.GetLocalHtmlDocument(listSheet, i); HtmlNodeCollection itemNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"dict_info_list\"]/ul/li"); HtmlNode itemNode = itemNodes[0]; string text = itemNode.InnerText.Trim(); int splitBeginIndex = text.IndexOf(":"); int splitEndIndex = text.IndexOf("个"); int wordCount = int.Parse(text.Substring(splitBeginIndex + 1, splitEndIndex - splitBeginIndex - 1)); Dictionary <string, string> f2vs = new Dictionary <string, string>(); f2vs.Add("cate1", row["cate1"]); f2vs.Add("cateId1", row["cateId1"]); f2vs.Add("cate2", row["cate2"]); f2vs.Add("cateId2", row["cateId2"]); f2vs.Add("cate3", row["cate3"]); f2vs.Add("cateId3", row["cateId3"]); f2vs.Add("name", row["name"]); f2vs.Add("wordCount", wordCount.ToString()); resultEW.AddRow(f2vs); } catch (Exception ex) { throw ex; } } } resultEW.SaveToDisk(); }