public IEnumerable <string> _generateParagraphsFromLinks(IEnumerable <string> documentLinks) { var siteData = new List <string>(); foreach (string documentLink in documentLinks) { var document = new DocumentNodeSelector(documentLink); if (!document.IsDocumentValid) { continue; } try { var paragraphs = document .FindAllParagraphs() .Where(paragraph => paragraph != null) .ToList(); paragraphs.RemoveRange(SearchSettings.MaxParagraphs, paragraphs.Count - SearchSettings.MaxParagraphs); siteData.AddRange(paragraphs); } catch (ArgumentNullException) { continue; } catch (Exception ex) { throw ex; } } return(siteData); }
/// <summary> /// Invokes the vectorization/ web search with a question. /// </summary> /// <param name="question">Question being asked</param> /// <returns>Vectorized answers</returns> public Dictionary <string, double> Invoke(string question) { string domainRootUrl = SearchDomain.GenerateRootUrl(question); var searchDomainDocument = new DocumentNodeSelector(domainRootUrl); var documentLinks = searchDomainDocument .FindAllLinks() .Select(link => SearchDomain.ParseSearchUrl(link)) .Where(link => link != null) .Distinct() .ToList(); documentLinks // resize the links .RemoveRange(SearchSettings.MaxLinks, documentLinks.Count - SearchSettings.MaxLinks); var originalKeywords = WebSearchHelper.SplitSenteceToKeywords(question); var paragraphs = _generateParagraphsFromLinks(documentLinks); var scoredKeywords = _createScoredKeywords(paragraphs, originalKeywords); return(_createScoredStatements(paragraphs, scoredKeywords)); }