private static void GetValue(Ljuser user, List <Forecast> result, string line) { string clearedLine = new Regex("<.*?>").Replace(line, string.Empty); string cl; int? number = GetValue(clearedLine, out cl); if (number.HasValue == false) { return; } int?ownersGoals = GetValue(cl, out cl); if (ownersGoals.HasValue == false) { return; } int?guestsGoals = GetValue(cl, out cl); if (guestsGoals.HasValue == false) { if (clearedLine.Contains("6")) { GetValue(user, result, clearedLine.Replace("6", ":")); } return; } result.Add(new Forecast(user) { GuestsGoals = guestsGoals.Value, Number = number.Value, OwnersGoals = ownersGoals.Value } ); }
public static IEnumerable <Forecast> Parse(string content, Ljuser user) { var result = new List <Forecast>(); content = new Regex("\\<p.*?\\<font.*?\\<a.*?a\\>.*?font\\>.*?p\\>").Replace(content, "").Replace("!", "1"); string[] lines = content.Split(new[] { "<br>", "<br />" }, StringSplitOptions.RemoveEmptyEntries); foreach (string line in lines) { GetValue(user, result, line); } return(result); }
public string ParseHtml(string html, string url = "") { var article = XpathSelector.Get(html, "//article[contains(@class, 'b-singlepost-body')]").First(); var innerHtml = article.InnerHtml .Replace("-Москва", "") .Replace("ПФК", "") .Replace("СКА-Хабаровск", "СКА Хабаровск") ; MatchCollection matches = new Regex(@">\s*(?<number>\d+)[.:]\s*(\w*\s+)?(?<owner>[А-Яа-я]+)\s*(\(\w*\))?\s*[:-]\s*(?<guest>[А-Яа-я]+)").Matches(innerHtml); string resultMessage = ""; using (IUnitOfWork uow = unitOfWorkFactory.Create()) { foreach (System.Text.RegularExpressions.Match regexMatch in matches) { var number = int.Parse(regexMatch.Groups["number"].Value); var owners = regexMatch.Groups["owner"].Value; var guests = regexMatch.Groups["guest"].Value; var match = queryFactory.GetMatchByNumber(number).Execute(); if (match == null) { var guestCommand = queryFactory.GetCommandByName(guests).Execute(); if (guestCommand == null) { uow.Save(guestCommand = new Command { Name = guests }); } var ownerCommand = queryFactory.GetCommandByName(owners).Execute(); if (ownerCommand == null) { uow.Save(ownerCommand = new Command { Name = owners }); } uow.Save(new Match { Number = number, Owners = ownerCommand, Guests = guestCommand }); } resultMessage += string.Format("<br/>{0}. {1} - {2}", number, owners, guests); } uow.Commit(); } resultMessage += "<br/>"; List <HtmlNode> comments = XpathSelector.Get(html, "//div[contains(@class, 'b-tree-twig')]").ToList(); int forecastCount = 0; foreach (HtmlNode comment in comments) { var nameNode = XpathSelector.Get(comment.OuterHtml, "//a/b/text()").FirstOrDefault(); if (nameNode == null) { continue; } string name = nameNode.InnerText; var singleOrDefault = XpathSelector.Get(comment.OuterHtml, "//div[@class='b-leaf-article']").SingleOrDefault(); if (singleOrDefault != null) { string content = singleOrDefault.InnerHtml; using (IUnitOfWork uow = unitOfWorkFactory.Create()) { Ljuser user = queryFactory.GetLjuserByName(name).Execute() ?? new Ljuser { Name = name }; List <Forecast> forecasts = Parse(content, user).ToList(); if (forecasts.Count() != 8) { string message = string.Format("Юзер {0} сделал {1} прогнозов", user.Name, forecasts.Count()); resultMessage += message + Environment.NewLine; log.Warn(message); log.WarnFormat(content); } if (forecasts.Any()) { int addForecasts = user.AddForecasts(forecasts); forecastCount += addForecasts; if (addForecasts != 8) { string message = string.Format("Юзеру {0} добавлено {1} прогнозов", user.Name, addForecasts); resultMessage += message + Environment.NewLine; log.Warn(message); } uow.Save(user); } uow.Commit(); } } } if (comments.Count() != 25) { log.WarnFormat("на странице {0} комментариев{1}", comments.Count(), Environment.NewLine); log.WarnFormat(url); } resultMessage += string.Format("Распарсено {0} комментариев {1}", comments.Count(), Environment.NewLine); resultMessage += string.Format("добавлено {0} новых прогнозов {1}", forecastCount, Environment.NewLine); return(resultMessage); }