public TffSourceScraper(WebScraperDispatcher dispatcher) { _dispatcher = dispatcher; _urlBinder = new ModelPropertyBinder(); var tempBinder = _urlBinder.AddChildBinder("urlList", typeof(CollectionPropertyBinderAction), ".MasterTable_TFF_Contents tbody tr"); tempBinder = tempBinder.AddChildBinder(null, typeof(ModelPropertyBinderAction), ""); tempBinder.AddChildBinder("source", typeof(AttributePropertyBinderAction), "td:first a", "href"); var customBinder = tempBinder.AddChildBinder("custom", typeof(ModelPropertyBinderAction), ""); customBinder.AddChildBinder("HomeTeam", typeof(HtmlPropertyBinderAction), "td:nth-child(2) a"); customBinder.AddChildBinder("AwayTeam", typeof(HtmlPropertyBinderAction), "td:nth-child(4) a"); customBinder.AddChildBinder("Date", typeof(HtmlPropertyBinderAction), "td:nth-child(5)"); customBinder.AddChildBinder("Time", typeof(HtmlPropertyBinderAction), "td:nth-child(6)"); customBinder.AddChildBinder("Stadium", typeof(HtmlPropertyBinderAction), "td:nth-child(7)"); customBinder.AddChildBinder("Organization", typeof(HtmlPropertyBinderAction), "td:nth-child(8) span"); }
public void ThreadStart(object p) { Dictionary<string, object> parameters = p as Dictionary<string, object>; ConcurrentQueue<Tuple<string, string>> sourceQueue = (ConcurrentQueue<Tuple<string, string>>)parameters["sourceQueue"]; ConcurrentQueue<object> modelQueue = (ConcurrentQueue<object>)parameters["modelQueue"]; List<BsonDocument> data = (List<BsonDocument>)parameters["data"]; WebScraperDispatcher dispatcher = (WebScraperDispatcher)parameters["dispatcher"]; WebScraperBrowser browser = dispatcher.AddBrowser(); Func<IContentBindingCollection, IContentBindingModel> transformMatchData = (collection) => { IContentBindingModel model = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel(); foreach(var item in collection) { var statModel = item as IContentBindingModel; var players = statModel["Players"] as IContentBindingCollection; var stat = statModel["StatName"].ToString().Trim(); IContentBindingCollection newStatCollection; switch (stat) { case "İlk 11": newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection(); foreach(var plyr in players) { var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel(); player["Number"] = (plyr as IContentBindingModel)["Info"].ToString().Trim().Replace(".", ""); player["Name"] = (plyr as IContentBindingModel)["PlayerName"]; newStatCollection.Add(player); } model["Lineup"] = newStatCollection; break; case "Yedekler": newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection(); foreach (var plyr in players) { var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel(); player["Number"] = (plyr as IContentBindingModel)["Info"].ToString().Trim().Replace(".", ""); player["Name"] = (plyr as IContentBindingModel)["PlayerName"]; newStatCollection.Add(player); } model["Substitutes"] = newStatCollection; break; case "Teknik Sorumlu": if (players.Count > 0) model["Coach"] = (players[0] as IContentBindingModel)["PlayerName"]; break; case "Goller": newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection(); foreach (var plyr in players) { var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel(); var playerName = Regex.Replace((plyr as IContentBindingModel)["PlayerName"].ToString(), "\\(.+\\)", ""); player["Name"] = playerName.Substring(0, playerName.LastIndexOf(",")).Trim(); player["Time"] = Regex.Replace(playerName.Substring(playerName.LastIndexOf(",") + 1), "\\.dk", "").Trim(); newStatCollection.Add(player); } model["Goals"] = newStatCollection; break; case "Kartlar": newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection(); foreach (var plyr in players) { var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel(); var info = (plyr as IContentBindingModel)["Info"].ToString(); var info2 = (plyr as IContentBindingModel)["Info2"].ToString(); player["Name"] = (plyr as IContentBindingModel)["PlayerName"]; player["Time"] = Regex.Replace(info, "\\.dk", "").Trim(); player["Type"] = info2; newStatCollection.Add(player); } model["Cards"] = newStatCollection; break; case "Oyundan Çıkanlar": newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection(); foreach (var plyr in players) { var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel(); var info = (plyr as IContentBindingModel)["Info"].ToString(); player["Name"] = (plyr as IContentBindingModel)["PlayerName"]; player["Time"] = Regex.Replace(info, "\\.dk", "").Trim(); newStatCollection.Add(player); } model["SubstitutionOff"] = newStatCollection; break; case "Oyuna Girenler": newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection(); foreach (var plyr in players) { var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel(); var info = (plyr as IContentBindingModel)["Info"].ToString(); player["Name"] = (plyr as IContentBindingModel)["PlayerName"]; player["Time"] = Regex.Replace(info, "\\.dk", "").Trim(); newStatCollection.Add(player); } model["SubstitutionOn"] = newStatCollection; break; default: break; } } if(!model.Properties.Contains("Goals")) { model["Goals"] = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection(); } return model; }; ModelPropertyBinder matchInfo = new ModelPropertyBinder(); matchInfo.AddChildBinder("HomeTeam", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lnkTakim1"); matchInfo.AddChildBinder("AwayTeam", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lnkTakim2"); matchInfo.AddChildBinder("HomeTeamScore", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lblTakim1Skor"); matchInfo.AddChildBinder("AwayTeamScore", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_Label12"); matchInfo.AddChildBinder("Organization", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lblOrganizasyonAdi"); matchInfo.AddChildBinder("MatchDateTime", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lblTarih"); var homeTeamStatistics = matchInfo.AddChildBinder("HomeTeamTempStats", typeof(CollectionPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_div > table > tbody > tr > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(1) > table").AddChildBinder("", typeof(ModelPropertyBinderAction), ""); homeTeamStatistics.AddChildBinder("StatName", typeof(HtmlPropertyBinderAction), ".MacDetayMiniBaslik span"); var homeTeamPlayer = homeTeamStatistics.AddChildBinder("Players", typeof(CollectionPropertyBinderAction), ".GriBorder tr td").AddChildBinder("", typeof(ModelPropertyBinderAction), ""); homeTeamPlayer.AddChildBinder("PlayerName", typeof(HtmlPropertyBinderAction), "a"); homeTeamPlayer.AddChildBinder("Info", typeof(HtmlPropertyBinderAction), "span"); homeTeamPlayer.AddChildBinder("Info2", typeof(AttributePropertyBinderAction), "img", "alt"); var awayTeamStatistics = matchInfo.AddChildBinder("AwayTeamTempStats", typeof(CollectionPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_div > table > tbody > tr > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(3) > table").AddChildBinder("", typeof(ModelPropertyBinderAction), ""); awayTeamStatistics.AddChildBinder("StatName", typeof(HtmlPropertyBinderAction), ".MacDetayMiniBaslik span"); var awayTeamPlayer = awayTeamStatistics.AddChildBinder("Players", typeof(CollectionPropertyBinderAction), ".GriBorder tr td").AddChildBinder("", typeof(ModelPropertyBinderAction), ""); awayTeamPlayer.AddChildBinder("PlayerName", typeof(HtmlPropertyBinderAction), "a"); awayTeamPlayer.AddChildBinder("Info", typeof(HtmlPropertyBinderAction), "span"); awayTeamPlayer.AddChildBinder("Info2", typeof(AttributePropertyBinderAction), "img", "alt"); for (int i = 0; i < data.Count; i++) { object matchModel = null; Tuple<string, string> sourceModel = null; try { scrapCount++; Console.WriteLine(scrapCount + "/" + totalToScrap); dispatcher.Navigate(browser.ID, data[i]["source"].AsString); matchInfo.SetDocument(browser.Document); matchInfo.Execute(); matchModel = matchInfo.Model; var model = matchInfo.Model as IContentBindingModel; model["AwayTeamStats"] = transformMatchData(model["AwayTeamTempStats"] as IContentBindingCollection); model["HomeTeamStats"] = transformMatchData(model["HomeTeamTempStats"] as IContentBindingCollection); model.RemoveProperty("AwayTeamTempStats"); model.RemoveProperty("HomeTeamTempStats"); var htScore = model["HomeTeamScore"]; var atScore = model["AwayTeamScore"]; if (htScore == null || string.IsNullOrWhiteSpace(htScore.ToString()) || atScore == null || string.IsNullOrWhiteSpace(atScore.ToString()) || (model["AwayTeamStats"] as IContentBindingModel).Properties.Count != 7 || (model["HomeTeamStats"] as IContentBindingModel).Properties.Count != 7) { sourceModel = new Tuple<string, string>(data[i]["source"].AsString, "2"); } else { sourceModel = new Tuple<string, string>(data[i]["source"].AsString, "1"); } } catch (Exception ex) { exceptionCount++; matchModel = null; sourceModel = null; sourceModel = new Tuple<string, string>(data[i]["source"].AsString, "3"); } finally { if (matchModel != null) modelQueue.Enqueue(matchModel); if (sourceModel != null) sourceQueue.Enqueue(sourceModel); } } dispatcher.Dispose(browser); }