public override HtmlCollectorStrategyBase GetStrategy()
        {
            var result = new HtmlCollectorStrategyBase();

            result.ScoredKeywords = new Dictionary <string, int>();
            result.ScoredKeywords.Add("C#", 20);
            result.ScoredKeywords.Add("ASP", 20);
            result.SearchBaseUrls =
                TestData.SearchUrls.Split(System.Environment.NewLine.ToCharArray(),
                                          StringSplitOptions.RemoveEmptyEntries).Take(2).ToList();
            result.SearchTerms = new List <string> {
                "C#", "ASP"
            };
            result.Searcher = new CraigslistSearcher();
            result.Searcher.SearcherConfiguration = new SearcherConfiguration
            {
                SearchQueryFormatExpression          = "{0}/search/sof?query+={1}",
                SearchResultKeyAttributeName         = "data-pid",
                SearchResultsCityGroupRegex          = "(?<city>[\\w. ]+)",
                SearchResultSourceUriLinkSelector    = ".pl a",
                SearchResultsParentNodeXPathSelector = "//p['data-pid']",
                SearchResultsStateProvinceGroupRegex = "(<?stateprovince>[\\w. ]+)",
                SearchResultTitleLinkSelector        = ".pl a"
            };
            //mocked document
            var mockDoc = new HtmlDocument();

            mockDoc.OptionOutputAsXml = true;
            mockDoc.LoadHtml(TestData.SearchResultsDocument);
            result.Searcher.DocumentNode = mockDoc.DocumentNode;
            return(result);
        }
Beispiel #2
0
        public List <SearchResult> GetSearchResults(string urlFilePath, List <string> searchTerms, Dictionary <string, int> scoredKeywords, SearcherBase searcher)
        {
            var collectorStrategy = new HtmlCollectorStrategyBase();
            var searchResultType  = searcher.GetType().ToString();

            using (var ctx = new SearcherEntities())
            {
                collectorStrategy.ExcludeUrls = ctx.SearchResultDatas.Where(x => x.PostType == searchResultType).Select(x => x.PostUrl).ToList();
            }
            collectorStrategy.ScoredKeywords = scoredKeywords;
            collectorStrategy.SearchBaseUrls = System.IO.File.ReadAllLines(urlFilePath).ToList();
            collectorStrategy.SearchTerms    = searchTerms;
            if (searcher.GetType() == typeof(CraigslistSearcher))
            {
                InitializeCraigslistSearcher(searcher);
            }
            collectorStrategy.Searcher = searcher;
            collectorStrategy.CollectSearchQueries();
            collectorStrategy.CollectSearches();
            collectorStrategy.ScoreResults();
            PersistSearchResults(collectorStrategy.SearchResults, searchResultType);
            return(collectorStrategy.SearchResults);
        }