public int getSumPages() { string positive_link = getParameterLink(getSource(getUserLink()), positive); string negative_link = getParameterLink(getSource(getUserLink()), negative); string positive_source = getSource(positive_link); string negative_source = getSource(positive_link); //List<string> res = searchPage(param_source, parameter); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(positive_source); HtmlAgilityPack.HtmlNode pagination = htmlDoc.DocumentNode.SelectSingleNode("//body").SelectSingleNode("//div[@id='CentralArea']").SelectSingleNode("//div[@class='newPagination']"); HtmlAgilityPack.HtmlNode pgn_pagination = pagination.SelectSingleNode("//b[@id='PGN_pagination1']"); Regex max_page_regex = new Regex(@">(\d+)</a></b>"); int max_pages = Int32.Parse(max_page_regex.Match(pgn_pagination.WriteTo()).Groups[1].ToString()); SUM_OF_PAGES += max_pages; htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(negative_source); pagination = htmlDoc.DocumentNode.SelectSingleNode("//body").SelectSingleNode("//div[@id='CentralArea']").SelectSingleNode("//div[@class='newPagination']"); pgn_pagination = pagination.SelectSingleNode("//b[@id='PGN_pagination1']"); max_pages = Int32.Parse(max_page_regex.Match(pgn_pagination.WriteTo()).Groups[1].ToString()); SUM_OF_PAGES += max_pages; return(SUM_OF_PAGES); }
public static MvcHtmlString AddAttributes(this MvcHtmlString html, params string[] attributes) { if (attributes.IsNullOrEmpty()) { return(html); } HtmlAgilityPack.HtmlNode mainNode = HtmlAgilityPack.HtmlNode.CreateNode(html.ToHtmlString().Trim()); attributes.ForEach(attribute => { string[] av = attribute.Split('='); mainNode.SetAttribute(av.First(), av.Last()); }); return(new MvcHtmlString(mainNode.WriteTo())); }
public void searchProductIn(object param) { string parameter = param.ToString(); if (!CheckForInternetConnection()) { throw new Exception("No Internet Connection"); } if (!initialized) { initialize(); } Console.WriteLine("Searching " + parameter); string link = getParameterLink(getSource(getUserLink()), parameter); string param_source = getSource(link); List <string> res = searchPage(param_source, parameter); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(param_source); HtmlAgilityPack.HtmlNode pagination = htmlDoc.DocumentNode.SelectSingleNode("//body").SelectSingleNode("//div[@id='CentralArea']").SelectSingleNode("//div[@class='newPagination']"); HtmlAgilityPack.HtmlNode pn_pagination = pagination.SelectSingleNode("//b[@id='PN_pagination1']"); HtmlAgilityPack.HtmlNode pgn_pagination = pagination.SelectSingleNode("//b[@id='PGN_pagination1']"); //Regex max_page_regex = new Regex(@">(\d+)</a></b>"); //Console.WriteLine("Max Page: " + max_page_regex.Match(pgn_pagination.WriteTo()).Groups[1].ToString()); //int max_pages = Int32.Parse(max_page_regex.Match(pgn_pagination.WriteTo()).Groups[1].ToString()); /* * if (mutex.WaitOne(1000)) * { * SUM_OF_PAGES += max_pages; * mutex.ReleaseMutex(); * }*/ Regex reg = new Regex(@"<a href=\x22([^\x22]+)\x22"); Regex page_num_regex = new Regex(@"page=(\d+)"); while (pn_pagination.WriteTo().Contains("<a href=")) { string next_page = reg.Match(pn_pagination.WriteTo()).Groups[1].ToString().Replace("amp;", ""); string page_number = page_num_regex.Match(next_page).Groups[1].ToString(); //Console.WriteLine("searching page: " + page_number); List <string> tmp = searchPage(getSource(next_page), parameter); /* if (mutex.WaitOne(1000)) * { * Console.WriteLine("====================="); * mutex.ReleaseMutex(); * }*/ //Console.WriteLine("====================="); res = res.Concat(tmp).ToList(); htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(getSource(next_page)); pagination = htmlDoc.DocumentNode.SelectSingleNode("//body").SelectSingleNode("//div[@id='CentralArea']").SelectSingleNode("//div[@class='newPagination']"); pn_pagination = pagination.SelectSingleNode("//b[@id='PN_pagination1']"); if (mutex.WaitOne(1000)) { App.progressBar.Advance(); mutex.ReleaseMutex(); } } dict[parameter] = dict[parameter].Concat(res).ToList(); }