Esempio n. 1
0
        public IEnumerable <string> _generateParagraphsFromLinks(IEnumerable <string> documentLinks)
        {
            var siteData = new List <string>();

            foreach (string documentLink in documentLinks)
            {
                var document = new DocumentNodeSelector(documentLink);

                if (!document.IsDocumentValid)
                {
                    continue;
                }

                try
                {
                    var paragraphs = document
                                     .FindAllParagraphs()
                                     .Where(paragraph => paragraph != null)
                                     .ToList();
                    paragraphs.RemoveRange(SearchSettings.MaxParagraphs, paragraphs.Count - SearchSettings.MaxParagraphs);

                    siteData.AddRange(paragraphs);
                }
                catch (ArgumentNullException)
                {
                    continue;
                }
                catch (Exception ex)
                {
                    throw ex;
                }
            }

            return(siteData);
        }
Esempio n. 2
0
        /// <summary>
        /// Invokes the vectorization/ web search with a question.
        /// </summary>
        /// <param name="question">Question being asked</param>
        /// <returns>Vectorized answers</returns>
        public Dictionary <string, double> Invoke(string question)
        {
            string domainRootUrl        = SearchDomain.GenerateRootUrl(question);
            var    searchDomainDocument = new DocumentNodeSelector(domainRootUrl);

            var documentLinks = searchDomainDocument
                                .FindAllLinks()
                                .Select(link => SearchDomain.ParseSearchUrl(link))
                                .Where(link => link != null)
                                .Distinct()
                                .ToList();

            documentLinks  // resize the links
            .RemoveRange(SearchSettings.MaxLinks, documentLinks.Count - SearchSettings.MaxLinks);

            var originalKeywords = WebSearchHelper.SplitSenteceToKeywords(question);

            var paragraphs     = _generateParagraphsFromLinks(documentLinks);
            var scoredKeywords = _createScoredKeywords(paragraphs, originalKeywords);

            return(_createScoredStatements(paragraphs, scoredKeywords));
        }