Пример #1
0
        private static void ParseResults(AssociationRule association, string searchTerm, HtmlDocument doc)
        {
            var nodes = doc.DocumentNode.SelectNodes("//div[@class = 'SearchResults-documents']/div/div");

            if (nodes != null)
            {
                Console.WriteLine($"{association.Rule} - {searchTerm}: {nodes.Count()} results");

                foreach (var node in nodes)
                {
                    var nodeTitle      = node.SelectSingleNode("./h2/a");
                    var nodePublicacao = node.SelectSingleNode("./div[@class = 'BaseSnippetWrapper-publisher']/span[@class = 'BaseSnippetWrapper-highlight-date']");
                    var nodeBody       = node.SelectSingleNode("./div[@class = 'BaseSnippetWrapper-body']");

                    var result = new SearchResult
                    {
                        Title          = nodeTitle.InnerText,
                        Url            = nodeTitle.GetAttributeValue("href", string.Empty),
                        DataPublicacao = nodePublicacao != null?nodePublicacao.InnerText.Split(':')[1].Trim() : string.Empty,
                                             Abstract = nodeBody.InnerText,
                                             UsedTerm = searchTerm
                    };

                    association.Results.Add(result);
                }
            }
            //else if (doc.DocumentNode.SelectSingleNode("//span[contains(text(), 'não encontrou nenhum documento')]") == null)
            //{
            //    throw new Exception("Erro inesperado, possível bloqueio de IP.");
            //}
        }
Пример #2
0
        private static AssociationRule ParseAssociationRule(string[] fields, int timeStamp)
        {
            var ruleItens = fields[0].Split('>');

            var rule = new AssociationRule
            {
                Principal  = Regex.Replace(ruleItens[1], @"\D", string.Empty),
                Members    = ruleItens[0].Split(',').Select(x => Regex.Replace(x, @"\D", string.Empty)).ToArray(),
                Rule       = fields[0],
                Support    = double.Parse(fields[1], CultureInfo.InvariantCulture),
                Confidence = double.Parse(fields[2], CultureInfo.InvariantCulture),
                Lift       = double.Parse(fields[3], CultureInfo.InvariantCulture),
                Count      = int.Parse(fields[4], CultureInfo.InvariantCulture),
                TimeStamp  = timeStamp
            };

            return(rule);
        }