private static void ParseResults(AssociationRule association, string searchTerm, HtmlDocument doc) { var nodes = doc.DocumentNode.SelectNodes("//div[@class = 'SearchResults-documents']/div/div"); if (nodes != null) { Console.WriteLine($"{association.Rule} - {searchTerm}: {nodes.Count()} results"); foreach (var node in nodes) { var nodeTitle = node.SelectSingleNode("./h2/a"); var nodePublicacao = node.SelectSingleNode("./div[@class = 'BaseSnippetWrapper-publisher']/span[@class = 'BaseSnippetWrapper-highlight-date']"); var nodeBody = node.SelectSingleNode("./div[@class = 'BaseSnippetWrapper-body']"); var result = new SearchResult { Title = nodeTitle.InnerText, Url = nodeTitle.GetAttributeValue("href", string.Empty), DataPublicacao = nodePublicacao != null?nodePublicacao.InnerText.Split(':')[1].Trim() : string.Empty, Abstract = nodeBody.InnerText, UsedTerm = searchTerm }; association.Results.Add(result); } } //else if (doc.DocumentNode.SelectSingleNode("//span[contains(text(), 'não encontrou nenhum documento')]") == null) //{ // throw new Exception("Erro inesperado, possível bloqueio de IP."); //} }
private static AssociationRule ParseAssociationRule(string[] fields, int timeStamp) { var ruleItens = fields[0].Split('>'); var rule = new AssociationRule { Principal = Regex.Replace(ruleItens[1], @"\D", string.Empty), Members = ruleItens[0].Split(',').Select(x => Regex.Replace(x, @"\D", string.Empty)).ToArray(), Rule = fields[0], Support = double.Parse(fields[1], CultureInfo.InvariantCulture), Confidence = double.Parse(fields[2], CultureInfo.InvariantCulture), Lift = double.Parse(fields[3], CultureInfo.InvariantCulture), Count = int.Parse(fields[4], CultureInfo.InvariantCulture), TimeStamp = timeStamp }; return(rule); }