public string CrawlBettingLinks()
 {
     TraceService("Crawl BettingLinks Started: ");
     try
     {
         CrawlFirstPageData crawldata = new CrawlFirstPageData();
         DataTable          dt        = crawldata.GetMatchInfo();
         //Task[] tasks = new Task[dt.Rows.Count];
         // for (int i = 0; i < dt.Rows.Count; i++)
         // {
         //     string url = dt.Rows[i]["BettingLink"].ToString();
         //     int id = Convert.ToInt32(dt.Rows[i]["id"].ToString());
         //     tasks[i] = Task.Factory.StartNew(() =>
         //     {
         //         CrawlBettingMarkets(url, id);
         //     },TaskCreationOptions.LongRunning);
         // }
         // Task.WaitAll(tasks);
         for (int i = 0; i < dt.Rows.Count; i++)
         {
             string           url = dt.Rows[i]["BettingLink"].ToString();
             int              id  = Convert.ToInt32(dt.Rows[i]["id"].ToString());
             ThreadParameters tp  = new ThreadParameters();
             tp.URL     = url;
             tp.MatchID = id;
             ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlBettingMarkets), tp);
         }
         return("Command completed successfully");
     }
     catch (Exception ex)
     {
         ErrorLog("CrawlBettingMarket ---- Error:" + ex.ToString());
         return(ex.Message);
     }
 }
 public string CrawlBettingLinksByCoupon(bool archive)
 {
     TraceService("Crawl BettingLinks Started: ");
     try
     {
         CrawlFirstPageData crawldata = new CrawlFirstPageData();
         DataTable          dt        = crawldata.GetMatchByCoupon(archive);
         // Task[] tasks = new Task[dt.Rows.Count];
         for (int i = 0; i < dt.Rows.Count; i++)
         {
             string           url = dt.Rows[i]["BettingLink"].ToString();
             int              id  = Convert.ToInt32(dt.Rows[i]["id"].ToString());
             ThreadParameters tp  = new ThreadParameters();
             tp.URL     = url;
             tp.MatchID = id;
             ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlBettingMarkets), tp);
         }
         // Task.WaitAll(tasks);
         return("Command completed successfully");
     }
     catch (Exception ex)
     {
         ErrorLog("CrawlBettingMarket ---- Error:" + ex.ToString());
         return(ex.Message);
     }
 }
 public string CrawlBettingLinksByCoupon(bool archive)
 {
     TraceService("Crawl BettingLinks Started: ");
     try
     {
         CrawlFirstPageData crawldata = new CrawlFirstPageData();
         DataTable dt = crawldata.GetMatchByCoupon(archive);
        // Task[] tasks = new Task[dt.Rows.Count];
         for (int i = 0; i < dt.Rows.Count; i++)
         {
             string url = dt.Rows[i]["BettingLink"].ToString();
             int id = Convert.ToInt32(dt.Rows[i]["id"].ToString());
             ThreadParameters tp = new ThreadParameters();
             tp.URL = url;
             tp.MatchID = id;
             ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlBettingMarkets), tp);
         }
        // Task.WaitAll(tasks);
         return "Command completed successfully";
     }
     catch (Exception ex)
     {
         ErrorLog("CrawlBettingMarket ---- Error:" + ex.ToString());
         return ex.Message;
     }
 }
 public string CrawlBettingLinks()
 {
     TraceService("Crawl BettingLinks Started: ");
     try
     {
         CrawlFirstPageData crawldata = new CrawlFirstPageData();
         DataTable dt = crawldata.GetMatchInfo();
        //Task[] tasks = new Task[dt.Rows.Count];
        // for (int i = 0; i < dt.Rows.Count; i++)
        // {
        //     string url = dt.Rows[i]["BettingLink"].ToString();
        //     int id = Convert.ToInt32(dt.Rows[i]["id"].ToString());
        //     tasks[i] = Task.Factory.StartNew(() =>
        //     {
        //         CrawlBettingMarkets(url, id);
        //     },TaskCreationOptions.LongRunning);
        // }
        // Task.WaitAll(tasks);
         for (int i = 0; i < dt.Rows.Count; i++)
         {
             string url = dt.Rows[i]["BettingLink"].ToString();
             int id = Convert.ToInt32(dt.Rows[i]["id"].ToString());
             ThreadParameters tp = new ThreadParameters();
             tp.URL = url;
             tp.MatchID = id;
             ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlBettingMarkets), tp);
         }
         return "Command completed successfully";
     }
     catch (Exception ex)
     {
       ErrorLog("CrawlBettingMarket ---- Error:"+ex.ToString());
         return ex.Message;
     }
 }
示例#5
0
        public void CrawlData()
        {
            FDOTService _service = new FDOTService();

            while (_service.IsLinkCrawlRemain() > 0)
            {
                var linkList = _service.GetUrlForLinkCrawl();
                if (linkList != null)
                {
                    foreach (var link in linkList)
                    {
                        ThreadParameters tp = new ThreadParameters
                        {
                            URL = link.FullUrl
                        };
                        string ext = Path.GetExtension(link.FullUrl);
                        if (ext == null)
                        {
                            ext = "";
                        }
                        if (skipExtension.Contains(ext) && ext != "")
                        {
                            // Skip this file format.
                            Console.WriteLine("--------Skipped Extension " + ext);
                        }
                        else
                        {
                            CrawlAllLinks(link);
                        }

                        // Update ulr status as IsLinkCrawled=True;
                        _service.UpdateLinkCrawled(link);

                        if (skipExtension.Contains(ext) && ext != "")
                        {
                            // Skip this file format.
                            Console.WriteLine("--------Skipped Extension " + ext);
                        }
                        else
                        {
                            CrawlContents(link);
                        }

                        // Update url status as IsDataCrawled= True;
                        _service.UpdateHtmlContentCrawled(link);
                    }
                }
            }
        }
        public void CrawlBettingMarkets(object tp)
        {
            ThreadParameters t   = tp as ThreadParameters;
            string           url = t.URL;
            int matchid          = t.MatchID;

            try
            {
                TraceService("Crawling Betting Market, MatchID:" + matchid + " and URL: " + url + " ----");
                string htmlcontent = Helper.GetWebSiteContent(url);
                HtmlAgilityPack.HtmlDocument doc         = Helper.LoadHtml(htmlcontent);
                List <BettingMarket>         bettinglist = new List <BettingMarket>();
                XmlDocument         xmldoc    = new XmlDocument();
                CrawlAllMarketsData crawldata = new CrawlAllMarketsData();
                var row11 = doc.DocumentNode.SelectSingleNode("//div[@id='mc']");
                var rows  = row11.SelectNodes("//ul//li[@class='more-list-li']");
                if (rows != null)
                {
                    for (int i = 0; i < rows.Count; i++)
                    {
                        string li      = rows[i].InnerText;
                        var    linkrow = rows[i].SelectNodes("./a");
                        string link    = "http://www.oddschecker.com" + linkrow[0].Attributes["href"].Value;
                        //links+=li+" "+link+" ";
                        if (!bettinglist.Exists(bettingmarket => bettingmarket.bettingmarket == li))
                        {
                            bettinglist.Add(new BettingMarket()
                            {
                                matchid = matchid, bettingmarket = li, bettinglink = link
                            });
                        }
                    }
                }

                xmldoc = GenerateXml(bettinglist);
                crawldata.InsertMarkets(xmldoc);
                TraceService("Betting Market Data Inserted MatchID:" + matchid + " and URL:" + url + "------");
            }
            catch (Exception ex)
            {
                ErrorLog("CrawlBettingMarket______MatchID: " + matchid + " and URL:" + url + " Error:" + ex.ToString());
            }
        }
        private void crawlerToolStripMenuItem_Click(object sender, EventArgs e)
        {
            // Start Crawling.
            //  time = new System.Threading.Timer(startcrawling());
            //BackCrawler.GetCrawler d = new GetCrawler();
            //d.startCrawling();

            //      foreach (ToolStripMenuItem item in startCrawlingToolStripMenuItem.DropDown.Items)
            //   {
            // item.Text = (sender as ToolStripMenuItem).Text;
            if (IsCrawlProcessRunning == true)
            {
                SetProgress(false);
                IsCrawlProcessRunning = false;
            }
            SetProgress(true);
            IsCrawlProcessRunning = true;
            CrawlFirstPage crawldata = new CrawlFirstPage();

            if ((sender as ToolStripMenuItem).Text == "League")
            {
                DataTable dt = crawldata.GetSports("2");
                string link = dt.Rows[0]["link"].ToString();
                int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString());
                if (sportid == 2)
                    crawldata.CrawlLeagues(link);
            }
            else if ((sender as ToolStripMenuItem).Text == "WorldMarket")
            {
                DataTable dt = crawldata.GetSports("2");
                string link = dt.Rows[0]["link"].ToString();
                int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString());
                if (sportid == 2)
                    crawldata.CrawlWorldMarkets();
            }
            else if ((sender as ToolStripMenuItem).Text == "Soccer")
            {
                DataTable dt = crawldata.GetSports((sender as ToolStripMenuItem).Name.ToString());
                string link = dt.Rows[0]["link"].ToString();
                int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString());
                // soocer
                //   crawldata.CrawlLeagues(link);
                //   crawldata.CrawlWorldMarkets();
                DataSet ds = crawldata.GetLeague();
                for (int j = 0; j < ds.Tables[0].Rows.Count; j++)
                {
                    string matchlink = Convert.ToString(ds.Tables[0].Rows[j]["link"]);
                    int sport_id = Convert.ToInt32(ds.Tables[0].Rows[j]["sportid"]);
                    long leagueid = Convert.ToInt32(ds.Tables[0].Rows[j]["leagueid"]);
                    ThreadParameters t = new ThreadParameters();
                    t.URL = matchlink;
                    t.SportID = sport_id;
                    t.LeagueID = leagueid;
                    crawldata.CrawlMyPage(t);
                }
            }
            else if ((sender as ToolStripMenuItem).Text == "GaaFootball")
            {
                DataTable dt = crawldata.GetSports((sender as ToolStripMenuItem).Name.ToString());
                string link = dt.Rows[0]["link"].ToString();
                int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString());
                // GAA Football
                ThreadParameters tp = new ThreadParameters();
                tp.URL = link;
                tp.SportID = sportid;
                if (sportid == 12)
                {
                    crawldata.CrawlMyPage(tp);
                }
            }
            else if ((sender as ToolStripMenuItem).Text == "GaaHurling")
            {
                DataTable dt = crawldata.GetSports((sender as ToolStripMenuItem).Name.ToString());
                string link = dt.Rows[0]["link"].ToString();
                int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString());
                ThreadParameters tp = new ThreadParameters();
                tp.URL = link;
                tp.SportID = sportid;
                // GAA Hurling.
                if (sportid == 28)
                {
                    crawldata.CrawlMyPage(tp);
                }
            }
            else if ((sender as ToolStripMenuItem).Text == "Golf")
            {
                DataTable dt = crawldata.GetSports((sender as ToolStripMenuItem).Name.ToString());
                string link = dt.Rows[0]["link"].ToString();
                int sportid = Convert.ToInt32(dt.Rows[0]["sportid"].ToString());
                // Golf
                if (sportid == 29)
                {
                    crawldata.CrawlGolfTurnament(link, sportid.ToString());
                }
            }
            else if ((sender as ToolStripMenuItem).Text == "GolfMarketName")
            {
                crawldata.CrawlGolfBettingMarketName();
            }
            else if ((sender as ToolStripMenuItem).Text == "GolfMarkets")
            {
                crawldata.CrawlGolfBettingMarket();
            }
            else if ((sender as ToolStripMenuItem).Text == "BettingMarket")
            {
                CrawlAllMarkets crawl = new CrawlAllMarkets();
                crawl.CrawlBettingLinks();
            }
            else if ((sender as ToolStripMenuItem).Text == "MarketResult")
            {
                crawldata.CrawlEachMatchResult();
            }
            //  Thread d = new Thread();
            SetProgress(false);
            IsCrawlProcessRunning = false;
            //   }
        }
        //public string CrawlEachSport()
        //{
        //    try
        //    {
        //        CrawlFirstPageData crawldata = new CrawlFirstPageData();
        //        DataTable dt = crawldata.GetSports();
        //        for (int i = 0; i < dt.Rows.Count; i++)
        //        {
        //            string link = dt.Rows[i]["link"].ToString();
        //            int sportid = Convert.ToInt32(dt.Rows[i]["sportid"].ToString());
        //            if (sportid.Equals(2))
        //            {
        //                CrawlLeagues(link);
        //                CrawlWorldMarkets();
        //                DataSet ds = crawldata.GetLeagues();
        //                for (int j = 0; j < ds.Tables[0].Rows.Count; j++)
        //                {
        //                    string matchlink = Convert.ToString(ds.Tables[0].Rows[j]["link"]);
        //                    int sport_id = Convert.ToInt32(ds.Tables[0].Rows[j]["sportid"]);
        //                    long leagueid = Convert.ToInt32(ds.Tables[0].Rows[j]["leagueid"]);
        //                    CrawlMyPage(matchlink, sportid, leagueid);
        //                }
        //            }
        //            else if (sportid.Equals(29))
        //            {
        //                CrawlGolf golf = new CrawlGolf();
        //                golf.CrawlGolfTurnament(link, sportid.ToString());
        //            }
        //            else
        //                CrawlMyPage(link, sportid, 0);
        //        }
        //        return "Command completed successfully";
        //    }
        //    catch (Exception ex)
        //    {
        //        return ex.Message;
        //    }
        //}
        #endregion
        public string CrawlEachSport()
        {
            try
            {
                CrawlFirstPageData crawldata = new CrawlFirstPageData();
                DataTable dt = crawldata.GetSports();
                for (int i = 0; i < dt.Rows.Count; i++)
                {
                    string link = dt.Rows[i]["link"].ToString();
                    int sportid = Convert.ToInt32(dt.Rows[i]["sportid"].ToString());
                    if (sportid.Equals(2))
                    {
                        ThreadParameters tp = new ThreadParameters();
                        tp.URL = link;
                        ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlLeagues), tp);
                      // CrawlLeagues(link);
                       CrawlWorldMarkets();
                        DataSet ds = crawldata.GetLeagues();
                        for (int j = 0; j < ds.Tables[0].Rows.Count; j++)
                        {
                            string matchlink = Convert.ToString(ds.Tables[0].Rows[j]["link"]);
                            int sport_id = Convert.ToInt32(ds.Tables[0].Rows[j]["sportid"]);
                            long leagueid = Convert.ToInt32(ds.Tables[0].Rows[j]["leagueid"]);

                            ThreadParameters t = new ThreadParameters();
                            t.URL = matchlink;
                            t.SportID = sport_id;
                            t.LeagueID = leagueid;
                            ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlMyPage), t);

                          //  CrawlMyPage(matchlink, sportid, leagueid);
                        }
                    }
                    else if(sportid.Equals(29))
                    {
                        CrawlGolf golf = new CrawlGolf();
              //        golf.CrawlGolfTurnament(link,sportid.ToString());
                    
                        ThreadParameters t = new ThreadParameters();
                      t.URL = link;
                      t.SportID = sportid;
                      t.LeagueID = 0;
                      ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlGolfTurnament), t);
                    }
                    else
                    {
                        ThreadParameters t = new ThreadParameters();
                        t.URL = link;
                        t.SportID = sportid;
                        t.LeagueID = 0;
                        ThreadPool.QueueUserWorkItem(new WaitCallback(CrawlMyPage), t);

                       // CrawlMyPage(link, sportid, 0);
                    }
                }
                return "Command completed successfully";
            }
            catch (Exception ex)
            {
                return ex.Message;
            }
        }
        public void CrawlGolfTurnament(object tp)
        {
            ThreadParameters t = new ThreadParameters();
            t = tp as ThreadParameters;
            string url = t.URL;
            int sportid = t.SportID;
            long leagueid = t.LeagueID;

            try
            {
                TraceService("Crawling Started: Golf:0 ,SportID:" + sportid + " , URL:" + url + "\n");

                string html = Helper.GetWebSiteContent(url);
                DataSet ds = new DataSet();
                HtmlAgilityPack.HtmlDocument doc = Helper.LoadHtml(html);
                CrawlFirstPageData crawldata = new CrawlFirstPageData();
                Matches match = new Matches();
                XmlDocument xmldoc = new XmlDocument();
                List<GolfTurnament> turnamentlist = new List<GolfTurnament>();
                var row11 = doc.DocumentNode.SelectSingleNode("//div[@class='containerHeight']");

                var row = row11.SelectNodes(".//a[@class='stats_link']");
                var rows = row11.SelectNodes("//div[@class='stats']");
                if (rows != null)
                {
                    string enddatetime = String.Empty;
                    for (int ii = 0; ii < rows.Count; ii = ii + 0)
                    {
                        GolfTurnament golf = new GolfTurnament();
                        int i = ii;
                        try
                        {
                            var n = rows[ii].ParentNode;
                            if (n.Name == "a")
                            {
                                if (n == null)
                                    golf.Link = "";
                                else golf.Link = "http://wikiform.com.au/oddschecker/" + n.Attributes["href"].Value;
                            }
                            else golf.Link = "";

                            if (rows[ii].Attributes["style"].Value == "clear:both;width:100px;")
                            {
                                golf.Duration = rows[ii].InnerText.Trim();
                                ii += 1;
                            }
                            else golf.Duration = "";
                            if (rows[ii].Attributes["style"].Value == "width:317px;text-decoration:underline;" || rows[ii].Attributes["style"].Value == "width:317px;")
                            {
                                golf.Turnament = rows[ii].InnerText.Trim();
                                ii += 1;
                            }
                            else golf.Turnament = "";
                            if (rows[ii].Attributes["style"].Value == "width:242px;")
                            {
                                golf.Course = rows[ii].InnerText.Trim();
                                ii += 1;
                            }
                            else golf.Course = "";
                            if (rows[ii].Attributes["style"].Value == "width:122px;border-right:0;")
                            {
                                golf.Champion = rows[ii].InnerText.Trim();
                                ii += 1;
                            }
                            else golf.Champion = "";
                            InsertGoldTurnament(golf);
                        }
                        catch (Exception ex)
                        {
                        }
                        if (i == ii)
                            ii += 1;
                        turnamentlist.Add(golf);
                    }

                  //  xmldoc = GenerateXmlGolfTurnament(turnamentlist);
                    //  InsertGoldTurnament(xmldoc, sportid);
                    TraceService("Data Inserted: Golf:0  ,SportID:" + sportid + " , URL:" + url + "\n");
                }
                //ds = crawldata.NewRecords(xmldoc);
                //return ds;
            }
            catch (Exception ex)
            {
                TraceService("Error:0 ,SportID:" + sportid + " , URL:" + url + "\n");
            }
        }
        public void CrawlMyPage(Object tp)
        {
            ThreadParameters dev = new ThreadParameters();
            dev = tp as ThreadParameters;
            string url = dev.URL;
            int sportid = dev.SportID;
            long leagueid = dev.LeagueID;
            try
            {
                TraceService("Crawling Started: League ID:" + leagueid + " ,SportID:" + sportid + " , URL:" + url + "\n");

                System.IO.StreamReader rader;
                string shtml = Helper.GetWebSiteContent(url);
                //  shtml = shtml.Replace("<!doctype HTML>", "");
                DataSet ds = new DataSet();
                //  HtmlAgilityPack.HtmlDocument doc = Helper.LoadHtml(html);
                CrawlFirstPageData crawldata = new CrawlFirstPageData();
                Matches match = new Matches();
                HtmlDocument doc = Helper.LoadHtml(shtml);
                TextReader tr = new StringReader(shtml);
                //xmldoc.Load(tr);
                XmlDocument xmldoc = new XmlDocument();
                List<Matches> matchlist = new List<Matches>();
                var row11 = doc.DocumentNode.SelectSingleNode("//div[@id='fixtures']");

                string title = row11.SelectNodes(".//h2")[0].InnerText;
                var rows = row11.SelectNodes("//table//tr");
                if (rows != null)
                {
                    string enddatetime = String.Empty;
                    for (int ii = 1; ii < rows.Count; ii = ii + 1)
                    {
                        var dr = rows[ii].InnerText.Trim();
                        var cols = rows[ii].SelectNodes("./td[@class='day']");

                        if (cols != null)
                        {
                            string t = cols[0].InnerText.Trim();
                            match.date = t;
                            string[] matchdate = t.Split(' ');
                            enddatetime = matchdate[1].Substring(0, (matchdate[1].Length - 2)) + " " + matchdate[2].Substring(0, 3) + " " + matchdate[3];
                            enddatetime = DateTime.Parse(enddatetime).ToString("yyyy-MM-dd");
                        }
                        else
                        {
                            var colnew = rows[ii].SelectNodes("./td");

                            if (colnew != null && colnew.Count > 4)
                            {
                                match.time = colnew[0].InnerText.Trim();
                                var Dlink = colnew[4].InnerText.Trim();
                                var home = colnew[1].SelectNodes(".//span[@class='fixtures-bet-name']");
                                var draw = colnew[2].SelectNodes(".//span[@class='fixtures-bet-name']");
                                var away = colnew[3].SelectNodes(".//span[@class='fixtures-bet-name']");
                                match.home = home[0].InnerText.Trim();
                                match.draw = draw[0].InnerText.Trim();
                                match.away = away[0].InnerText.Trim();
                                match.createddate = DateTime.Now;
                                //var link = colnew[4].SelectNodes("./a[contains(@href, '/gaelic-games/gaelic-football/')]");
                                var link = colnew[4].SelectNodes("./a");
                                match.bettinglink = "http://www.oddschecker.com/" + link[0].Attributes["href"].Value.Replace("/winner", "/betting-markets");
                                match.resultlink = "http://www.oddschecker.com/" + link[0].Attributes["href"].Value.Replace("/winner", "/winner");
                                match.Displayenddatetime = DateTime.Parse(enddatetime + " " + match.time);
                                match.league = leagueid.ToString();

                                //match.bettinglink = "http://www.oddschecker.com/"+link[0].Attributes["href"].Value;
                                matchlist.Add(new Matches() { date = match.date, time = match.time, home = match.home, draw = match.draw, away = match.away, bettinglink = match.bettinglink, Displayenddatetime = match.Displayenddatetime, resultlink = match.resultlink });
                                crawldata.InsertMatchinfoDev(match, sportid);
                            }
                        }
                    }

                    xmldoc = GenerateXml(matchlist);
                    // crawldata.InsertMatchInfo(xmldoc,sportid,leagueid);
                    TraceService("Data Inserted: League ID:" + leagueid + " ,SportID:" + sportid + " , URL:" + url + "\n");
                }
                //ds = crawldata.NewRecords(xmldoc);
                //return ds;
              //  return "Command completed successfully";
            }
            catch (Exception ex)
            {
                TraceService("Error:" + leagueid + " ,SportID:" + sportid + " , URL:" + url + "\n");
               // return ex.Message;
            }
        }