DataTable loadDataTableFromSelection(FileHelper fileInput, XmlDocument document) { XpathSelector selection = new XpathSelector(XElement.Parse(document.InnerXml), XElement.Load(fileInput.Xpath)); DataTable result = selection.SelectData(); result = result.Rows.Cast <DataRow>() .Where(row => !row.ItemArray.All(field => field is DBNull || string.IsNullOrWhiteSpace(field as string))) .CopyToDataTable(); XmlHelper.RemoveDuplicatesFromDataTable(ref result); return(result); }
public string ParseHtml(string html, string url = "") { var article = XpathSelector.Get(html, "//article[contains(@class, 'b-singlepost-body')]").First(); var innerHtml = article.InnerHtml .Replace("-Москва", "") .Replace("ПФК", "") .Replace("СКА-Хабаровск", "СКА Хабаровск") ; MatchCollection matches = new Regex(@">\s*(?<number>\d+)[.:]\s*(\w*\s+)?(?<owner>[А-Яа-я]+)\s*(\(\w*\))?\s*[:-]\s*(?<guest>[А-Яа-я]+)").Matches(innerHtml); string resultMessage = ""; using (IUnitOfWork uow = unitOfWorkFactory.Create()) { foreach (System.Text.RegularExpressions.Match regexMatch in matches) { var number = int.Parse(regexMatch.Groups["number"].Value); var owners = regexMatch.Groups["owner"].Value; var guests = regexMatch.Groups["guest"].Value; var match = queryFactory.GetMatchByNumber(number).Execute(); if (match == null) { var guestCommand = queryFactory.GetCommandByName(guests).Execute(); if (guestCommand == null) { uow.Save(guestCommand = new Command { Name = guests }); } var ownerCommand = queryFactory.GetCommandByName(owners).Execute(); if (ownerCommand == null) { uow.Save(ownerCommand = new Command { Name = owners }); } uow.Save(new Match { Number = number, Owners = ownerCommand, Guests = guestCommand }); } resultMessage += string.Format("<br/>{0}. {1} - {2}", number, owners, guests); } uow.Commit(); } resultMessage += "<br/>"; List <HtmlNode> comments = XpathSelector.Get(html, "//div[contains(@class, 'b-tree-twig')]").ToList(); int forecastCount = 0; foreach (HtmlNode comment in comments) { var nameNode = XpathSelector.Get(comment.OuterHtml, "//a/b/text()").FirstOrDefault(); if (nameNode == null) { continue; } string name = nameNode.InnerText; var singleOrDefault = XpathSelector.Get(comment.OuterHtml, "//div[@class='b-leaf-article']").SingleOrDefault(); if (singleOrDefault != null) { string content = singleOrDefault.InnerHtml; using (IUnitOfWork uow = unitOfWorkFactory.Create()) { Ljuser user = queryFactory.GetLjuserByName(name).Execute() ?? new Ljuser { Name = name }; List <Forecast> forecasts = Parse(content, user).ToList(); if (forecasts.Count() != 8) { string message = string.Format("Юзер {0} сделал {1} прогнозов", user.Name, forecasts.Count()); resultMessage += message + Environment.NewLine; log.Warn(message); log.WarnFormat(content); } if (forecasts.Any()) { int addForecasts = user.AddForecasts(forecasts); forecastCount += addForecasts; if (addForecasts != 8) { string message = string.Format("Юзеру {0} добавлено {1} прогнозов", user.Name, addForecasts); resultMessage += message + Environment.NewLine; log.Warn(message); } uow.Save(user); } uow.Commit(); } } } if (comments.Count() != 25) { log.WarnFormat("на странице {0} комментариев{1}", comments.Count(), Environment.NewLine); log.WarnFormat(url); } resultMessage += string.Format("Распарсено {0} комментариев {1}", comments.Count(), Environment.NewLine); resultMessage += string.Format("добавлено {0} новых прогнозов {1}", forecastCount, Environment.NewLine); return(resultMessage); }
public string ParseResult(string html) { string resultMessage = ""; IEnumerable <HtmlNode> matchesTr = XpathSelector.Get(html, "//table[@class='stat-table']/tbody/tr"); using (IUnitOfWork uow = unitOfWorkFactory.Create()) { IEnumerable <Match> allMatches = queryFactory.FindAll <Match>().Execute().ToList(); foreach (HtmlNode matchTr in matchesTr) { var scores = XpathSelector.Get(matchTr.OuterHtml, "//td[@class='score-td']//a").Single().InnerText.Split(new[] { ":" }, StringSplitOptions.RemoveEmptyEntries); int ownersGoals; if (int.TryParse(scores.First(), out ownersGoals) == false) { continue; } var owners = XpathSelector.Get(matchTr.OuterHtml, "//td[@class='owner-td']//a").Single().InnerText.Replace(" Москва", ""); var guests = XpathSelector.Get(matchTr.OuterHtml, "//td[@class='guests-td']//a").Single().InnerText.Replace(" Москва", ""); var dateString = XpathSelector.Get(matchTr.OuterHtml, "//td[@class='name-td alLeft']").Single().InnerText; Match matchFromDb = allMatches.SingleOrDefault(x => x.Guests.Name == guests && x.Owners.Name == owners); if (matchFromDb != null) { var guestsGoals = int.Parse(scores.Last()); DateTime date = DateTime.Parse(dateString.Replace("|", " ")); if (matchFromDb.OwnersGoals.HasValue && matchFromDb.GuestsGoals.HasValue) { if ((matchFromDb.Guests.Name != guests || matchFromDb.GuestsGoals != guestsGoals || matchFromDb.Owners.Name != owners || matchFromDb.OwnersGoals != ownersGoals)) { throw new ArgumentException(string.Format("Результаты матча {0} не совпадают!", owners + "-" + guests)); } } else { matchFromDb.GuestsGoals = guestsGoals; matchFromDb.OwnersGoals = ownersGoals; matchFromDb.Date = date; resultMessage += string.Format("{0}. {1} - {2} {3}:{4}{5}", matchFromDb.Number, owners, guests, ownersGoals, guestsGoals, Environment.NewLine); } } // else // { // uow.Save(new Match // { // Number = number, // OwnersGoals = ownersGoals, // GuestsGoals = guestsGoals, // Guests = queryFactory.GetCommandByName(guests).Execute(), // Owners = queryFactory.GetCommandByName(owners).Execute(), // Date = date // }); // } } uow.Commit(); } return(resultMessage); }