public string CrawlBettingLinks() { TraceService("Crawl BettingLinks Started: "); try { CrawlFirstPageData crawldata = new CrawlFirstPageData(); DataTable dt = crawldata.GetMatchInfo(); //Task[] tasks = new Task[dt.Rows.Count]; // for (int i = 0; i < dt.Rows.Count; i++) // { // string url = dt.Rows[i]["BettingLink"].ToString(); // int id = Convert.ToInt32(dt.Rows[i]["id"].ToString()); // tasks[i] = Task.Factory.StartNew(() => // { // CrawlBettingMarkets(url, id); // },TaskCreationOptions.LongRunning); // } // Task.WaitAll(tasks); for (int i = 0; i < dt.Rows.Count; i++) { string url = dt.Rows[i]["BettingLink"].ToString(); int id = Convert.ToInt32(dt.Rows[i]["id"].ToString()); ThreadParameters tp = new ThreadParameters(); tp.URL = url; tp.MatchID = id; ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlBettingMarkets), tp); } return("Command completed successfully"); } catch (Exception ex) { ErrorLog("CrawlBettingMarket ---- Error:" + ex.ToString()); return(ex.Message); } }
public string CrawlBettingLinksByCoupon(bool archive) { TraceService("Crawl BettingLinks Started: "); try { CrawlFirstPageData crawldata = new CrawlFirstPageData(); DataTable dt = crawldata.GetMatchByCoupon(archive); // Task[] tasks = new Task[dt.Rows.Count]; for (int i = 0; i < dt.Rows.Count; i++) { string url = dt.Rows[i]["BettingLink"].ToString(); int id = Convert.ToInt32(dt.Rows[i]["id"].ToString()); ThreadParameters tp = new ThreadParameters(); tp.URL = url; tp.MatchID = id; ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlBettingMarkets), tp); } // Task.WaitAll(tasks); return("Command completed successfully"); } catch (Exception ex) { ErrorLog("CrawlBettingMarket ---- Error:" + ex.ToString()); return(ex.Message); } }
public string CrawlBettingLinksByCoupon(bool archive) { TraceService("Crawl BettingLinks Started: "); try { CrawlFirstPageData crawldata = new CrawlFirstPageData(); DataTable dt = crawldata.GetMatchByCoupon(archive); // Task[] tasks = new Task[dt.Rows.Count]; for (int i = 0; i < dt.Rows.Count; i++) { string url = dt.Rows[i]["BettingLink"].ToString(); int id = Convert.ToInt32(dt.Rows[i]["id"].ToString()); ThreadParameters tp = new ThreadParameters(); tp.URL = url; tp.MatchID = id; ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlBettingMarkets), tp); } // Task.WaitAll(tasks); return "Command completed successfully"; } catch (Exception ex) { ErrorLog("CrawlBettingMarket ---- Error:" + ex.ToString()); return ex.Message; } }
public string CrawlBettingLinks() { TraceService("Crawl BettingLinks Started: "); try { CrawlFirstPageData crawldata = new CrawlFirstPageData(); DataTable dt = crawldata.GetMatchInfo(); //Task[] tasks = new Task[dt.Rows.Count]; // for (int i = 0; i < dt.Rows.Count; i++) // { // string url = dt.Rows[i]["BettingLink"].ToString(); // int id = Convert.ToInt32(dt.Rows[i]["id"].ToString()); // tasks[i] = Task.Factory.StartNew(() => // { // CrawlBettingMarkets(url, id); // },TaskCreationOptions.LongRunning); // } // Task.WaitAll(tasks); for (int i = 0; i < dt.Rows.Count; i++) { string url = dt.Rows[i]["BettingLink"].ToString(); int id = Convert.ToInt32(dt.Rows[i]["id"].ToString()); ThreadParameters tp = new ThreadParameters(); tp.URL = url; tp.MatchID = id; ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlBettingMarkets), tp); } return "Command completed successfully"; } catch (Exception ex) { ErrorLog("CrawlBettingMarket ---- Error:"+ex.ToString()); return ex.Message; } }
public void CrawlData() { FDOTService _service = new FDOTService(); while (_service.IsLinkCrawlRemain() > 0) { var linkList = _service.GetUrlForLinkCrawl(); if (linkList != null) { foreach (var link in linkList) { ThreadParameters tp = new ThreadParameters { URL = link.FullUrl }; string ext = Path.GetExtension(link.FullUrl); if (ext == null) { ext = ""; } if (skipExtension.Contains(ext) && ext != "") { // Skip this file format. Console.WriteLine("--------Skipped Extension " + ext); } else { CrawlAllLinks(link); } // Update ulr status as IsLinkCrawled=True; _service.UpdateLinkCrawled(link); if (skipExtension.Contains(ext) && ext != "") { // Skip this file format. Console.WriteLine("--------Skipped Extension " + ext); } else { CrawlContents(link); } // Update url status as IsDataCrawled= True; _service.UpdateHtmlContentCrawled(link); } } } }
public void CrawlBettingMarkets(object tp) { ThreadParameters t = tp as ThreadParameters; string url = t.URL; int matchid = t.MatchID; try { TraceService("Crawling Betting Market, MatchID:" + matchid + " and URL: " + url + " ----"); string htmlcontent = Helper.GetWebSiteContent(url); HtmlAgilityPack.HtmlDocument doc = Helper.LoadHtml(htmlcontent); List <BettingMarket> bettinglist = new List <BettingMarket>(); XmlDocument xmldoc = new XmlDocument(); CrawlAllMarketsData crawldata = new CrawlAllMarketsData(); var row11 = doc.DocumentNode.SelectSingleNode("//div[@id='mc']"); var rows = row11.SelectNodes("//ul//li[@class='more-list-li']"); if (rows != null) { for (int i = 0; i < rows.Count; i++) { string li = rows[i].InnerText; var linkrow = rows[i].SelectNodes("./a"); string link = "http://www.oddschecker.com" + linkrow[0].Attributes["href"].Value; //links+=li+" "+link+" "; if (!bettinglist.Exists(bettingmarket => bettingmarket.bettingmarket == li)) { bettinglist.Add(new BettingMarket() { matchid = matchid, bettingmarket = li, bettinglink = link }); } } } xmldoc = GenerateXml(bettinglist); crawldata.InsertMarkets(xmldoc); TraceService("Betting Market Data Inserted MatchID:" + matchid + " and URL:" + url + "------"); } catch (Exception ex) { ErrorLog("CrawlBettingMarket______MatchID: " + matchid + " and URL:" + url + " Error:" + ex.ToString()); } }
private void crawlerToolStripMenuItem_Click(object sender, EventArgs e) { // Start Crawling. // time = new System.Threading.Timer(startcrawling()); //BackCrawler.GetCrawler d = new GetCrawler(); //d.startCrawling(); // foreach (ToolStripMenuItem item in startCrawlingToolStripMenuItem.DropDown.Items) // { // item.Text = (sender as ToolStripMenuItem).Text; if (IsCrawlProcessRunning == true) { SetProgress(false); IsCrawlProcessRunning = false; } SetProgress(true); IsCrawlProcessRunning = true; CrawlFirstPage crawldata = new CrawlFirstPage(); if ((sender as ToolStripMenuItem).Text == "League") { DataTable dt = crawldata.GetSports("2"); string link = dt.Rows[0]["link"].ToString(); int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString()); if (sportid == 2) crawldata.CrawlLeagues(link); } else if ((sender as ToolStripMenuItem).Text == "WorldMarket") { DataTable dt = crawldata.GetSports("2"); string link = dt.Rows[0]["link"].ToString(); int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString()); if (sportid == 2) crawldata.CrawlWorldMarkets(); } else if ((sender as ToolStripMenuItem).Text == "Soccer") { DataTable dt = crawldata.GetSports((sender as ToolStripMenuItem).Name.ToString()); string link = dt.Rows[0]["link"].ToString(); int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString()); // soocer // crawldata.CrawlLeagues(link); // crawldata.CrawlWorldMarkets(); DataSet ds = crawldata.GetLeague(); for (int j = 0; j < ds.Tables[0].Rows.Count; j++) { string matchlink = Convert.ToString(ds.Tables[0].Rows[j]["link"]); int sport_id = Convert.ToInt32(ds.Tables[0].Rows[j]["sportid"]); long leagueid = Convert.ToInt32(ds.Tables[0].Rows[j]["leagueid"]); ThreadParameters t = new ThreadParameters(); t.URL = matchlink; t.SportID = sport_id; t.LeagueID = leagueid; crawldata.CrawlMyPage(t); } } else if ((sender as ToolStripMenuItem).Text == "GaaFootball") { DataTable dt = crawldata.GetSports((sender as ToolStripMenuItem).Name.ToString()); string link = dt.Rows[0]["link"].ToString(); int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString()); // GAA Football ThreadParameters tp = new ThreadParameters(); tp.URL = link; tp.SportID = sportid; if (sportid == 12) { crawldata.CrawlMyPage(tp); } } else if ((sender as ToolStripMenuItem).Text == "GaaHurling") { DataTable dt = crawldata.GetSports((sender as ToolStripMenuItem).Name.ToString()); string link = dt.Rows[0]["link"].ToString(); int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString()); ThreadParameters tp = new ThreadParameters(); tp.URL = link; tp.SportID = sportid; // GAA Hurling. if (sportid == 28) { crawldata.CrawlMyPage(tp); } } else if ((sender as ToolStripMenuItem).Text == "Golf") { DataTable dt = crawldata.GetSports((sender as ToolStripMenuItem).Name.ToString()); string link = dt.Rows[0]["link"].ToString(); int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString()); // Golf if (sportid == 29) { crawldata.CrawlGolfTurnament(link, sportid.ToString()); } } else if ((sender as ToolStripMenuItem).Text == "GolfMarketName") { crawldata.CrawlGolfBettingMarketName(); } else if ((sender as ToolStripMenuItem).Text == "GolfMarkets") { crawldata.CrawlGolfBettingMarket(); } else if ((sender as ToolStripMenuItem).Text == "BettingMarket") { CrawlAllMarkets crawl = new CrawlAllMarkets(); crawl.CrawlBettingLinks(); } else if ((sender as ToolStripMenuItem).Text == "MarketResult") { crawldata.CrawlEachMatchResult(); } // Thread d = new Thread(); SetProgress(false); IsCrawlProcessRunning = false; // } }
//public string CrawlEachSport() //{ // try // { // CrawlFirstPageData crawldata = new CrawlFirstPageData(); // DataTable dt = crawldata.GetSports(); // for (int i = 0; i < dt.Rows.Count; i++) // { // string link = dt.Rows[i]["link"].ToString(); // int sportid = Convert.ToInt32(dt.Rows[i]["sportid"].ToString()); // if (sportid.Equals(2)) // { // CrawlLeagues(link); // CrawlWorldMarkets(); // DataSet ds = crawldata.GetLeagues(); // for (int j = 0; j < ds.Tables[0].Rows.Count; j++) // { // string matchlink = Convert.ToString(ds.Tables[0].Rows[j]["link"]); // int sport_id = Convert.ToInt32(ds.Tables[0].Rows[j]["sportid"]); // long leagueid = Convert.ToInt32(ds.Tables[0].Rows[j]["leagueid"]); // CrawlMyPage(matchlink, sportid, leagueid); // } // } // else if (sportid.Equals(29)) // { // CrawlGolf golf = new CrawlGolf(); // golf.CrawlGolfTurnament(link, sportid.ToString()); // } // else // CrawlMyPage(link, sportid, 0); // } // return "Command completed successfully"; // } // catch (Exception ex) // { // return ex.Message; // } //} #endregion public string CrawlEachSport() { try { CrawlFirstPageData crawldata = new CrawlFirstPageData(); DataTable dt = crawldata.GetSports(); for (int i = 0; i < dt.Rows.Count; i++) { string link = dt.Rows[i]["link"].ToString(); int sportid = Convert.ToInt32(dt.Rows[i]["sportid"].ToString()); if (sportid.Equals(2)) { ThreadParameters tp = new ThreadParameters(); tp.URL = link; ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlLeagues), tp); // CrawlLeagues(link); CrawlWorldMarkets(); DataSet ds = crawldata.GetLeagues(); for (int j = 0; j < ds.Tables[0].Rows.Count; j++) { string matchlink = Convert.ToString(ds.Tables[0].Rows[j]["link"]); int sport_id = Convert.ToInt32(ds.Tables[0].Rows[j]["sportid"]); long leagueid = Convert.ToInt32(ds.Tables[0].Rows[j]["leagueid"]); ThreadParameters t = new ThreadParameters(); t.URL = matchlink; t.SportID = sport_id; t.LeagueID = leagueid; ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlMyPage), t); // CrawlMyPage(matchlink, sportid, leagueid); } } else if(sportid.Equals(29)) { CrawlGolf golf = new CrawlGolf(); // golf.CrawlGolfTurnament(link,sportid.ToString()); ThreadParameters t = new ThreadParameters(); t.URL = link; t.SportID = sportid; t.LeagueID = 0; ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlGolfTurnament), t); } else { ThreadParameters t = new ThreadParameters(); t.URL = link; t.SportID = sportid; t.LeagueID = 0; ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlMyPage), t); // CrawlMyPage(link, sportid, 0); } } return "Command completed successfully"; } catch (Exception ex) { return ex.Message; } }
public void CrawlGolfTurnament(object tp) { ThreadParameters t = new ThreadParameters(); t = tp as ThreadParameters; string url = t.URL; int sportid = t.SportID; long leagueid = t.LeagueID; try { TraceService("Crawling Started: Golf:0 ,SportID:" + sportid + " , URL:" + url + "\n"); string html = Helper.GetWebSiteContent(url); DataSet ds = new DataSet(); HtmlAgilityPack.HtmlDocument doc = Helper.LoadHtml(html); CrawlFirstPageData crawldata = new CrawlFirstPageData(); Matches match = new Matches(); XmlDocument xmldoc = new XmlDocument(); List<GolfTurnament> turnamentlist = new List<GolfTurnament>(); var row11 = doc.DocumentNode.SelectSingleNode("//div[@class='containerHeight']"); var row = row11.SelectNodes(".//a[@class='stats_link']"); var rows = row11.SelectNodes("//div[@class='stats']"); if (rows != null) { string enddatetime = String.Empty; for (int ii = 0; ii < rows.Count; ii = ii + 0) { GolfTurnament golf = new GolfTurnament(); int i = ii; try { var n = rows[ii].ParentNode; if (n.Name == "a") { if (n == null) golf.Link = ""; else golf.Link = "http://wikiform.com.au/oddschecker/" + n.Attributes["href"].Value; } else golf.Link = ""; if (rows[ii].Attributes["style"].Value == "clear:both;width:100px;") { golf.Duration = rows[ii].InnerText.Trim(); ii += 1; } else golf.Duration = ""; if (rows[ii].Attributes["style"].Value == "width:317px;text-decoration:underline;" || rows[ii].Attributes["style"].Value == "width:317px;") { golf.Turnament = rows[ii].InnerText.Trim(); ii += 1; } else golf.Turnament = ""; if (rows[ii].Attributes["style"].Value == "width:242px;") { golf.Course = rows[ii].InnerText.Trim(); ii += 1; } else golf.Course = ""; if (rows[ii].Attributes["style"].Value == "width:122px;border-right:0;") { golf.Champion = rows[ii].InnerText.Trim(); ii += 1; } else golf.Champion = ""; InsertGoldTurnament(golf); } catch (Exception ex) { } if (i == ii) ii += 1; turnamentlist.Add(golf); } // xmldoc = GenerateXmlGolfTurnament(turnamentlist); // InsertGoldTurnament(xmldoc, sportid); TraceService("Data Inserted: Golf:0 ,SportID:" + sportid + " , URL:" + url + "\n"); } //ds = crawldata.NewRecords(xmldoc); //return ds; } catch (Exception ex) { TraceService("Error:0 ,SportID:" + sportid + " , URL:" + url + "\n"); } }
public void CrawlMyPage(Object tp) { ThreadParameters dev = new ThreadParameters(); dev = tp as ThreadParameters; string url = dev.URL; int sportid = dev.SportID; long leagueid = dev.LeagueID; try { TraceService("Crawling Started: League ID:" + leagueid + " ,SportID:" + sportid + " , URL:" + url + "\n"); System.IO.StreamReader rader; string shtml = Helper.GetWebSiteContent(url); // shtml = shtml.Replace("<!doctype HTML>", ""); DataSet ds = new DataSet(); // HtmlAgilityPack.HtmlDocument doc = Helper.LoadHtml(html); CrawlFirstPageData crawldata = new CrawlFirstPageData(); Matches match = new Matches(); HtmlDocument doc = Helper.LoadHtml(shtml); TextReader tr = new StringReader(shtml); //xmldoc.Load(tr); XmlDocument xmldoc = new XmlDocument(); List<Matches> matchlist = new List<Matches>(); var row11 = doc.DocumentNode.SelectSingleNode("//div[@id='fixtures']"); string title = row11.SelectNodes(".//h2")[0].InnerText; var rows = row11.SelectNodes("//table//tr"); if (rows != null) { string enddatetime = String.Empty; for (int ii = 1; ii < rows.Count; ii = ii + 1) { var dr = rows[ii].InnerText.Trim(); var cols = rows[ii].SelectNodes("./td[@class='day']"); if (cols != null) { string t = cols[0].InnerText.Trim(); match.date = t; string[] matchdate = t.Split(' '); enddatetime = matchdate[1].Substring(0, (matchdate[1].Length - 2)) + " " + matchdate[2].Substring(0, 3) + " " + matchdate[3]; enddatetime = DateTime.Parse(enddatetime).ToString("yyyy-MM-dd"); } else { var colnew = rows[ii].SelectNodes("./td"); if (colnew != null && colnew.Count > 4) { match.time = colnew[0].InnerText.Trim(); var Dlink = colnew[4].InnerText.Trim(); var home = colnew[1].SelectNodes(".//span[@class='fixtures-bet-name']"); var draw = colnew[2].SelectNodes(".//span[@class='fixtures-bet-name']"); var away = colnew[3].SelectNodes(".//span[@class='fixtures-bet-name']"); match.home = home[0].InnerText.Trim(); match.draw = draw[0].InnerText.Trim(); match.away = away[0].InnerText.Trim(); match.createddate = DateTime.Now; //var link = colnew[4].SelectNodes("./a[contains(@href, '/gaelic-games/gaelic-football/')]"); var link = colnew[4].SelectNodes("./a"); match.bettinglink = "http://www.oddschecker.com/" + link[0].Attributes["href"].Value.Replace("/winner", "/betting-markets"); match.resultlink = "http://www.oddschecker.com/" + link[0].Attributes["href"].Value.Replace("/winner", "/winner"); match.Displayenddatetime = DateTime.Parse(enddatetime + " " + match.time); match.league = leagueid.ToString(); //match.bettinglink = "http://www.oddschecker.com/"+link[0].Attributes["href"].Value; matchlist.Add(new Matches() { date = match.date, time = match.time, home = match.home, draw = match.draw, away = match.away, bettinglink = match.bettinglink, Displayenddatetime = match.Displayenddatetime, resultlink = match.resultlink }); crawldata.InsertMatchinfoDev(match, sportid); } } } xmldoc = GenerateXml(matchlist); // crawldata.InsertMatchInfo(xmldoc,sportid,leagueid); TraceService("Data Inserted: League ID:" + leagueid + " ,SportID:" + sportid + " , URL:" + url + "\n"); } //ds = crawldata.NewRecords(xmldoc); //return ds; // return "Command completed successfully"; } catch (Exception ex) { TraceService("Error:" + leagueid + " ,SportID:" + sportid + " , URL:" + url + "\n"); // return ex.Message; } }