DataTable loadDataTableFromSelection(FileHelper fileInput, XmlDocument document)
        {
            XpathSelector selection = new XpathSelector(XElement.Parse(document.InnerXml), XElement.Load(fileInput.Xpath));
            DataTable     result    = selection.SelectData();

            result = result.Rows.Cast <DataRow>()
                     .Where(row => !row.ItemArray.All(field => field is DBNull || string.IsNullOrWhiteSpace(field as string)))
                     .CopyToDataTable();
            XmlHelper.RemoveDuplicatesFromDataTable(ref result);

            return(result);
        }
Exemplo n.º 2
0
        public string ParseHtml(string html, string url = "")
        {
            var article = XpathSelector.Get(html, "//article[contains(@class, 'b-singlepost-body')]").First();

            var innerHtml = article.InnerHtml
                            .Replace("-Москва", "")
                            .Replace("ПФК", "")
                            .Replace("СКА-Хабаровск", "СКА Хабаровск")
            ;
            MatchCollection matches = new Regex(@">\s*(?<number>\d+)[.:]\s*(\w*\s+)?(?<owner>[А-Яа-я]+)\s*(\(\w*\))?\s*[:-]\s*(?<guest>[А-Яа-я]+)").Matches(innerHtml);

            string resultMessage = "";

            using (IUnitOfWork uow = unitOfWorkFactory.Create())
            {
                foreach (System.Text.RegularExpressions.Match regexMatch in matches)
                {
                    var number = int.Parse(regexMatch.Groups["number"].Value);
                    var owners = regexMatch.Groups["owner"].Value;
                    var guests = regexMatch.Groups["guest"].Value;

                    var match = queryFactory.GetMatchByNumber(number).Execute();
                    if (match == null)
                    {
                        var guestCommand = queryFactory.GetCommandByName(guests).Execute();
                        if (guestCommand == null)
                        {
                            uow.Save(guestCommand = new Command {
                                Name = guests
                            });
                        }

                        var ownerCommand = queryFactory.GetCommandByName(owners).Execute();
                        if (ownerCommand == null)
                        {
                            uow.Save(ownerCommand = new Command {
                                Name = owners
                            });
                        }

                        uow.Save(new Match
                        {
                            Number = number,
                            Owners = ownerCommand,
                            Guests = guestCommand
                        });
                    }

                    resultMessage += string.Format("<br/>{0}. {1} - {2}", number, owners, guests);
                }
                uow.Commit();
            }
            resultMessage += "<br/>";

            List <HtmlNode> comments      = XpathSelector.Get(html, "//div[contains(@class, 'b-tree-twig')]").ToList();
            int             forecastCount = 0;

            foreach (HtmlNode comment in comments)
            {
                var nameNode = XpathSelector.Get(comment.OuterHtml, "//a/b/text()").FirstOrDefault();
                if (nameNode == null)
                {
                    continue;
                }
                string name            = nameNode.InnerText;
                var    singleOrDefault = XpathSelector.Get(comment.OuterHtml, "//div[@class='b-leaf-article']").SingleOrDefault();
                if (singleOrDefault != null)
                {
                    string content = singleOrDefault.InnerHtml;

                    using (IUnitOfWork uow = unitOfWorkFactory.Create())
                    {
                        Ljuser user = queryFactory.GetLjuserByName(name).Execute() ?? new Ljuser {
                            Name = name
                        };

                        List <Forecast> forecasts = Parse(content, user).ToList();

                        if (forecasts.Count() != 8)
                        {
                            string message = string.Format("Юзер {0} сделал {1} прогнозов", user.Name, forecasts.Count());
                            resultMessage += message + Environment.NewLine;
                            log.Warn(message);
                            log.WarnFormat(content);
                        }

                        if (forecasts.Any())
                        {
                            int addForecasts = user.AddForecasts(forecasts);
                            forecastCount += addForecasts;
                            if (addForecasts != 8)
                            {
                                string message = string.Format("Юзеру {0} добавлено {1} прогнозов", user.Name, addForecasts);
                                resultMessage += message + Environment.NewLine;
                                log.Warn(message);
                            }
                            uow.Save(user);
                        }

                        uow.Commit();
                    }
                }
            }

            if (comments.Count() != 25)
            {
                log.WarnFormat("на странице {0} комментариев{1}", comments.Count(), Environment.NewLine);
                log.WarnFormat(url);
            }

            resultMessage += string.Format("Распарсено {0} комментариев {1}", comments.Count(), Environment.NewLine);
            resultMessage += string.Format("добавлено {0} новых прогнозов {1}", forecastCount, Environment.NewLine);


            return(resultMessage);
        }
Exemplo n.º 3
0
        public string ParseResult(string html)
        {
            string resultMessage             = "";
            IEnumerable <HtmlNode> matchesTr = XpathSelector.Get(html, "//table[@class='stat-table']/tbody/tr");

            using (IUnitOfWork uow = unitOfWorkFactory.Create())
            {
                IEnumerable <Match> allMatches = queryFactory.FindAll <Match>().Execute().ToList();
                foreach (HtmlNode matchTr in matchesTr)
                {
                    var scores = XpathSelector.Get(matchTr.OuterHtml, "//td[@class='score-td']//a").Single().InnerText.Split(new[] { ":" }, StringSplitOptions.RemoveEmptyEntries);

                    int ownersGoals;
                    if (int.TryParse(scores.First(), out ownersGoals) == false)
                    {
                        continue;
                    }

                    var owners     = XpathSelector.Get(matchTr.OuterHtml, "//td[@class='owner-td']//a").Single().InnerText.Replace(" Москва", "");
                    var guests     = XpathSelector.Get(matchTr.OuterHtml, "//td[@class='guests-td']//a").Single().InnerText.Replace(" Москва", "");
                    var dateString = XpathSelector.Get(matchTr.OuterHtml, "//td[@class='name-td alLeft']").Single().InnerText;

                    Match matchFromDb = allMatches.SingleOrDefault(x => x.Guests.Name == guests && x.Owners.Name == owners);
                    if (matchFromDb != null)
                    {
                        var guestsGoals = int.Parse(scores.Last());

                        DateTime date = DateTime.Parse(dateString.Replace("|", " "));

                        if (matchFromDb.OwnersGoals.HasValue && matchFromDb.GuestsGoals.HasValue)
                        {
                            if ((matchFromDb.Guests.Name != guests ||
                                 matchFromDb.GuestsGoals != guestsGoals ||
                                 matchFromDb.Owners.Name != owners ||
                                 matchFromDb.OwnersGoals != ownersGoals))
                            {
                                throw new ArgumentException(string.Format("Результаты матча {0} не совпадают!", owners + "-" + guests));
                            }
                        }
                        else
                        {
                            matchFromDb.GuestsGoals = guestsGoals;
                            matchFromDb.OwnersGoals = ownersGoals;
                            matchFromDb.Date        = date;

                            resultMessage += string.Format("{0}. {1} - {2} {3}:{4}{5}", matchFromDb.Number, owners, guests, ownersGoals, guestsGoals, Environment.NewLine);
                        }
                    }
//                    else
//                    {
//                        uow.Save(new Match
//                                     {
//                                         Number = number,
//                                         OwnersGoals = ownersGoals,
//                                         GuestsGoals = guestsGoals,
//                                         Guests = queryFactory.GetCommandByName(guests).Execute(),
//                                         Owners = queryFactory.GetCommandByName(owners).Execute(),
//                                         Date = date
//                                     });
//                    }
                }
                uow.Commit();
            }
            return(resultMessage);
        }