public override HtmlCollectorStrategyBase GetStrategy() { var result = new HtmlCollectorStrategyBase(); result.ScoredKeywords = new Dictionary <string, int>(); result.ScoredKeywords.Add("C#", 20); result.ScoredKeywords.Add("ASP", 20); result.SearchBaseUrls = TestData.SearchUrls.Split(System.Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries).Take(2).ToList(); result.SearchTerms = new List <string> { "C#", "ASP" }; result.Searcher = new CraigslistSearcher(); result.Searcher.SearcherConfiguration = new SearcherConfiguration { SearchQueryFormatExpression = "{0}/search/sof?query+={1}", SearchResultKeyAttributeName = "data-pid", SearchResultsCityGroupRegex = "(?<city>[\\w. ]+)", SearchResultSourceUriLinkSelector = ".pl a", SearchResultsParentNodeXPathSelector = "//p['data-pid']", SearchResultsStateProvinceGroupRegex = "(<?stateprovince>[\\w. ]+)", SearchResultTitleLinkSelector = ".pl a" }; //mocked document var mockDoc = new HtmlDocument(); mockDoc.OptionOutputAsXml = true; mockDoc.LoadHtml(TestData.SearchResultsDocument); result.Searcher.DocumentNode = mockDoc.DocumentNode; return(result); }
public List <SearchResult> GetSearchResults(string urlFilePath, List <string> searchTerms, Dictionary <string, int> scoredKeywords, SearcherBase searcher) { var collectorStrategy = new HtmlCollectorStrategyBase(); var searchResultType = searcher.GetType().ToString(); using (var ctx = new SearcherEntities()) { collectorStrategy.ExcludeUrls = ctx.SearchResultDatas.Where(x => x.PostType == searchResultType).Select(x => x.PostUrl).ToList(); } collectorStrategy.ScoredKeywords = scoredKeywords; collectorStrategy.SearchBaseUrls = System.IO.File.ReadAllLines(urlFilePath).ToList(); collectorStrategy.SearchTerms = searchTerms; if (searcher.GetType() == typeof(CraigslistSearcher)) { InitializeCraigslistSearcher(searcher); } collectorStrategy.Searcher = searcher; collectorStrategy.CollectSearchQueries(); collectorStrategy.CollectSearches(); collectorStrategy.ScoreResults(); PersistSearchResults(collectorStrategy.SearchResults, searchResultType); return(collectorStrategy.SearchResults); }