コード例 #1
0
        public TffSourceScraper(WebScraperDispatcher dispatcher)
        {
            _dispatcher = dispatcher;

            _urlBinder = new ModelPropertyBinder();
            var tempBinder = _urlBinder.AddChildBinder("urlList", typeof(CollectionPropertyBinderAction), ".MasterTable_TFF_Contents tbody tr");
            
            tempBinder = tempBinder.AddChildBinder(null, typeof(ModelPropertyBinderAction), "");
            tempBinder.AddChildBinder("source", typeof(AttributePropertyBinderAction), "td:first a", "href");

            var customBinder = tempBinder.AddChildBinder("custom", typeof(ModelPropertyBinderAction), "");
            customBinder.AddChildBinder("HomeTeam", typeof(HtmlPropertyBinderAction), "td:nth-child(2) a");
            customBinder.AddChildBinder("AwayTeam", typeof(HtmlPropertyBinderAction), "td:nth-child(4) a");
            customBinder.AddChildBinder("Date", typeof(HtmlPropertyBinderAction), "td:nth-child(5)");
            customBinder.AddChildBinder("Time", typeof(HtmlPropertyBinderAction), "td:nth-child(6)");
            customBinder.AddChildBinder("Stadium", typeof(HtmlPropertyBinderAction), "td:nth-child(7)");
            customBinder.AddChildBinder("Organization", typeof(HtmlPropertyBinderAction), "td:nth-child(8) span");


            
        }
コード例 #2
0
        public void ThreadStart(object p)
        {
            Dictionary<string, object> parameters = p as Dictionary<string, object>;
            ConcurrentQueue<Tuple<string, string>> sourceQueue = (ConcurrentQueue<Tuple<string, string>>)parameters["sourceQueue"];
            ConcurrentQueue<object> modelQueue = (ConcurrentQueue<object>)parameters["modelQueue"];
            List<BsonDocument> data = (List<BsonDocument>)parameters["data"];

            WebScraperDispatcher dispatcher = (WebScraperDispatcher)parameters["dispatcher"];
            WebScraperBrowser browser = dispatcher.AddBrowser();

            Func<IContentBindingCollection, IContentBindingModel> transformMatchData = (collection) =>
            {
                IContentBindingModel model = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel();
                foreach(var item in collection)
                {
                    var statModel = item as IContentBindingModel;
                    var players = statModel["Players"] as IContentBindingCollection;
                    var stat = statModel["StatName"].ToString().Trim();
                    IContentBindingCollection newStatCollection;
                    switch (stat)
                    {
                        case "İlk 11":
                            newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection();
                            foreach(var plyr in players)
                            {
                                var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel();
                                player["Number"] = (plyr as IContentBindingModel)["Info"].ToString().Trim().Replace(".", "");
                                player["Name"] = (plyr as IContentBindingModel)["PlayerName"];
                                newStatCollection.Add(player);
                            }
                            model["Lineup"] = newStatCollection;
                            break;
                        case "Yedekler":
                            newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection();
                            foreach (var plyr in players)
                            {
                                var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel();
                                player["Number"] = (plyr as IContentBindingModel)["Info"].ToString().Trim().Replace(".", "");
                                player["Name"] = (plyr as IContentBindingModel)["PlayerName"];
                                newStatCollection.Add(player);
                            }
                            model["Substitutes"] = newStatCollection;
                            break;
                        case "Teknik Sorumlu":
                            if (players.Count > 0)
                                model["Coach"] = (players[0] as IContentBindingModel)["PlayerName"];
                            break;
                        case "Goller":
                            newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection();
                            foreach (var plyr in players)
                            {
                                var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel();
                                
                                var playerName = Regex.Replace((plyr as IContentBindingModel)["PlayerName"].ToString(), "\\(.+\\)", "");
                                player["Name"] = playerName.Substring(0, playerName.LastIndexOf(",")).Trim();
                                player["Time"] = Regex.Replace(playerName.Substring(playerName.LastIndexOf(",") + 1), "\\.dk", "").Trim();
                                newStatCollection.Add(player);
                            }
                            model["Goals"] = newStatCollection;
                            break;
                        case "Kartlar":
                            newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection();
                            foreach (var plyr in players)
                            {
                                var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel();
                                var info = (plyr as IContentBindingModel)["Info"].ToString();
                                var info2 = (plyr as IContentBindingModel)["Info2"].ToString();
                                player["Name"] = (plyr as IContentBindingModel)["PlayerName"];
                                player["Time"] = Regex.Replace(info, "\\.dk", "").Trim();
                                player["Type"] = info2;
                                newStatCollection.Add(player);
                            }
                            model["Cards"] = newStatCollection;
                            break;
                        case "Oyundan Çıkanlar":
                            newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection();
                            foreach (var plyr in players)
                            {
                                var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel();
                                var info = (plyr as IContentBindingModel)["Info"].ToString();
                                player["Name"] = (plyr as IContentBindingModel)["PlayerName"];
                                player["Time"] = Regex.Replace(info, "\\.dk", "").Trim();
                                newStatCollection.Add(player);
                            }
                            model["SubstitutionOff"] = newStatCollection;
                            break;
                        case "Oyuna Girenler":
                            newStatCollection = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection();
                            foreach (var plyr in players)
                            {
                                var player = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingModel();
                                var info = (plyr as IContentBindingModel)["Info"].ToString();
                                player["Name"] = (plyr as IContentBindingModel)["PlayerName"];
                                player["Time"] = Regex.Replace(info, "\\.dk", "").Trim();
                                newStatCollection.Add(player);
                            }
                            model["SubstitutionOn"] = newStatCollection;
                            break;
                        default:
                            break;
                    }
                }

                if(!model.Properties.Contains("Goals"))
                {
                    model["Goals"] = ModelPropertyBinderElementFactory.DefaultElementFactory.CreateContentBindingCollection();
                }

                return model;
            };

            ModelPropertyBinder matchInfo = new ModelPropertyBinder();
            matchInfo.AddChildBinder("HomeTeam", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lnkTakim1");
            matchInfo.AddChildBinder("AwayTeam", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lnkTakim2");
            matchInfo.AddChildBinder("HomeTeamScore", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lblTakim1Skor");
            matchInfo.AddChildBinder("AwayTeamScore", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_Label12");
            matchInfo.AddChildBinder("Organization", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lblOrganizasyonAdi");
            matchInfo.AddChildBinder("MatchDateTime", typeof(HtmlPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_m_29_194_MacBilgiDisplay1_dtMacBilgisi_lblTarih");

            var homeTeamStatistics = matchInfo.AddChildBinder("HomeTeamTempStats", typeof(CollectionPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_div > table > tbody > tr > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(1) > table").AddChildBinder("", typeof(ModelPropertyBinderAction), "");
            homeTeamStatistics.AddChildBinder("StatName", typeof(HtmlPropertyBinderAction), ".MacDetayMiniBaslik span");
            var homeTeamPlayer = homeTeamStatistics.AddChildBinder("Players", typeof(CollectionPropertyBinderAction), ".GriBorder tr td").AddChildBinder("", typeof(ModelPropertyBinderAction), "");
            homeTeamPlayer.AddChildBinder("PlayerName", typeof(HtmlPropertyBinderAction), "a");
            homeTeamPlayer.AddChildBinder("Info", typeof(HtmlPropertyBinderAction), "span");
            homeTeamPlayer.AddChildBinder("Info2", typeof(AttributePropertyBinderAction), "img", "alt");

            var awayTeamStatistics = matchInfo.AddChildBinder("AwayTeamTempStats", typeof(CollectionPropertyBinderAction), "#ctl00_MPane_m_29_194_ctnr_div > table > tbody > tr > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(3) > table").AddChildBinder("", typeof(ModelPropertyBinderAction), "");
            awayTeamStatistics.AddChildBinder("StatName", typeof(HtmlPropertyBinderAction), ".MacDetayMiniBaslik span");
            var awayTeamPlayer = awayTeamStatistics.AddChildBinder("Players", typeof(CollectionPropertyBinderAction), ".GriBorder tr td").AddChildBinder("", typeof(ModelPropertyBinderAction), "");
            awayTeamPlayer.AddChildBinder("PlayerName", typeof(HtmlPropertyBinderAction), "a");
            awayTeamPlayer.AddChildBinder("Info", typeof(HtmlPropertyBinderAction), "span");
            awayTeamPlayer.AddChildBinder("Info2", typeof(AttributePropertyBinderAction), "img", "alt");

            for (int i = 0; i < data.Count; i++)
            {
                object matchModel = null;
                Tuple<string, string> sourceModel = null;
                try
                {
                    scrapCount++;
                    Console.WriteLine(scrapCount + "/" + totalToScrap);
                    dispatcher.Navigate(browser.ID, data[i]["source"].AsString);
                    matchInfo.SetDocument(browser.Document);
                    matchInfo.Execute();
                    matchModel = matchInfo.Model;

                    var model = matchInfo.Model as IContentBindingModel;
                    model["AwayTeamStats"] = transformMatchData(model["AwayTeamTempStats"] as IContentBindingCollection);
                    model["HomeTeamStats"] = transformMatchData(model["HomeTeamTempStats"] as IContentBindingCollection);
                    model.RemoveProperty("AwayTeamTempStats");
                    model.RemoveProperty("HomeTeamTempStats");

                    var htScore = model["HomeTeamScore"];
                    var atScore = model["AwayTeamScore"];
                    if (htScore == null || string.IsNullOrWhiteSpace(htScore.ToString()) || atScore == null || string.IsNullOrWhiteSpace(atScore.ToString()) || (model["AwayTeamStats"] as IContentBindingModel).Properties.Count != 7 || (model["HomeTeamStats"] as IContentBindingModel).Properties.Count != 7)
                    {
                        sourceModel = new Tuple<string, string>(data[i]["source"].AsString, "2");
                    }
                    else
                    {
                        sourceModel = new Tuple<string, string>(data[i]["source"].AsString, "1");
                    }
                }
                catch (Exception ex)
                {
                    exceptionCount++;
                    matchModel = null;
                    sourceModel = null;
                    sourceModel = new Tuple<string, string>(data[i]["source"].AsString, "3");
                }
                finally
                {
                    if (matchModel != null)
                        modelQueue.Enqueue(matchModel);
                    if (sourceModel != null)
                        sourceQueue.Enqueue(sourceModel);
                }
            }

            dispatcher.Dispose(browser);
        }