public override HtmlCollectorStrategyBase GetStrategy()
 {
     var result = new HtmlCollectorStrategyBase();
     result.ScoredKeywords = new Dictionary<string, int>();
     result.ScoredKeywords.Add("C#", 20);
     result.ScoredKeywords.Add("ASP", 20);
     result.SearchBaseUrls =
         TestData.SearchUrls.Split(System.Environment.NewLine.ToCharArray(),
             StringSplitOptions.RemoveEmptyEntries).Take(2).ToList();
     result.SearchTerms = new List<string> { "C#", "ASP" };
     result.Searcher = new CraigslistSearcher();
     result.Searcher.SearcherConfiguration = new SearcherConfiguration
     {
         SearchQueryFormatExpression = "{0}/search/sof?query+={1}",
         SearchResultKeyAttributeName = "data-pid",
         SearchResultsCityGroupRegex = "(?<city>[\\w. ]+)",
         SearchResultSourceUriLinkSelector = ".pl a",
         SearchResultsParentNodeXPathSelector = "//p['data-pid']",
         SearchResultsStateProvinceGroupRegex = "(<?stateprovince>[\\w. ]+)",
         SearchResultTitleLinkSelector = ".pl a"
     };
     //mocked document
     var mockDoc = new HtmlDocument();
     mockDoc.OptionOutputAsXml = true;
     mockDoc.LoadHtml(TestData.SearchResultsDocument);
     result.Searcher.DocumentNode = mockDoc.DocumentNode;
     return result;
 }
Example #2
0
        public List<SearchResult> GetSearchResults(string urlFilePath, List<string> searchTerms, Dictionary<string, int> scoredKeywords, SearcherBase searcher)
        {
            var collectorStrategy = new HtmlCollectorStrategyBase();
            var searchResultType = searcher.GetType().ToString();

            using (var ctx = new SearcherEntities())
            {
                collectorStrategy.ExcludeUrls = ctx.SearchResultDatas.Where(x=>x.PostType==searchResultType).Select(x => x.PostUrl).ToList();
            }
            collectorStrategy.ScoredKeywords = scoredKeywords;
            collectorStrategy.SearchBaseUrls = System.IO.File.ReadAllLines(urlFilePath).ToList();
            collectorStrategy.SearchTerms = searchTerms;
            if (searcher.GetType() == typeof (CraigslistSearcher))
                InitializeCraigslistSearcher(searcher);
            collectorStrategy.Searcher = searcher;
            collectorStrategy.CollectSearchQueries();
            collectorStrategy.CollectSearches();
            collectorStrategy.ScoreResults();
            PersistSearchResults(collectorStrategy.SearchResults, searchResultType);
            return collectorStrategy.SearchResults;
        }