public override bool Execute() { SiteCrawler crawler = new SiteCrawler(this.PublishedWebsiteUrl, this, new MSBuildLogWrapper(this.Log)); crawler.Crawl(); return true; }
public override bool Execute() { SiteCrawler crawler = new SiteCrawler(this.PublishedWebsiteUrl, this, new MSBuildLogWrapper(this.Log)); crawler.Crawl(); return(true); }
public void GetVideoItemsFromPage2Test() { var configData = new HtmlSiteProviderConfigData( "https://vimeo.com/channels/360vr/videos/page:{0}/sort:preset", "https://vimeo.com{0}", "ol.js-browse_list > li > a", "ol.js-browse_list > li > a > img", "ol.js-browse_list > li > a div.l-ellipsis" ); var crawler = new SiteCrawler(configData, new HtmlDocumentTransport()); var provider = new HtmlBasedVideoProvider(configData, crawler); var res = provider.GetVideos(2); foreach (var videoItemData in res) { _output.WriteLine(videoItemData.ToString()); } Assert.Equal(12, res.Count); }
public void GetVideoItemsFromPageTest() { var configData = new HtmlSiteProviderConfigData( "https://www.pornhub.com/vr?page={0}", "https://www.pornhub.com{0}", "#videoCategory > li.pcVideoListItem div.phimage > a", "#videoCategory > li.pcVideoListItem div.phimage > a > img", "#videoCategory > li.pcVideoListItem span.title > a" ); var crawler = new SiteCrawler(configData, new HtmlDocumentTransport()); var provider = new HtmlBasedVideoProvider(configData, crawler); var res = provider.GetVideos(2); foreach (var videoItemData in res) { _output.WriteLine(videoItemData.ToString()); } Assert.Equal(44, res.Count); }
static void Main(string[] args) { bool optimize = true; bool show_help = false; bool doquery = false; string indexFolder = ".totalrecall"; string website = ""; string query = ""; var p = new OptionSet() { { "o|optimize", "optimize index (default true)", v => { optimize = v != null; } }, { "q|query", "Query index", v => { doquery = v != null; } }, { "h|?|help", "show this screen", v => { show_help = v != null; } }, { "i|index=", "Lucene index location (default .totalrecall)" , v => { indexFolder = v; } } }; List<string> extras; try { extras = p.Parse(args); if (!doquery && extras.Count > 0) website = extras[0]; else if (doquery && extras.Count > 0) query = extras[0]; } catch (OptionException e) { Console.Write("rekall: "); Console.WriteLine(e.Message); Console.WriteLine("Try `rekall -?' for more information."); return; } if ((String.IsNullOrEmpty(website) && String.IsNullOrEmpty(query)) || show_help) { PrintUsage(); p.WriteOptionDescriptions(Console.Out); return; } if (doquery) { var se = new SearchEngine(); int index = 1; foreach (var hit in se.Search(query, 9)) { Console.WriteLine("{0}. {1}", index, hit.Title); Console.WriteLine(" {0}", hit.Url); Console.WriteLine(); index++; } } else { var crawler = new SiteCrawler(website, new ConsoleConfig() { IndexFolder = indexFolder, Optimize = optimize }, new ConsoleLogWrapper()); crawler.Crawl(); } #if DEBUG Console.WriteLine(); Console.Write("Press any key to continue"); Console.ReadKey(); #endif }
static void Main(string[] args) { bool optimize = true; bool show_help = false; bool doquery = false; string indexFolder = ".totalrecall"; string website = ""; string query = ""; var p = new OptionSet() { { "o|optimize", "optimize index (default true)", v => { optimize = v != null; } }, { "q|query", "Query index", v => { doquery = v != null; } }, { "h|?|help", "show this screen", v => { show_help = v != null; } }, { "i|index=", "Lucene index location (default .totalrecall)", v => { indexFolder = v; } } }; List <string> extras; try { extras = p.Parse(args); if (!doquery && extras.Count > 0) { website = extras[0]; } else if (doquery && extras.Count > 0) { query = extras[0]; } } catch (OptionException e) { Console.Write("rekall: "); Console.WriteLine(e.Message); Console.WriteLine("Try `rekall -?' for more information."); return; } if ((String.IsNullOrEmpty(website) && String.IsNullOrEmpty(query)) || show_help) { PrintUsage(); p.WriteOptionDescriptions(Console.Out); return; } if (doquery) { var se = new SearchEngine(); int index = 1; foreach (var hit in se.Search(query, 9)) { Console.WriteLine("{0}. {1}", index, hit.Title); Console.WriteLine(" {0}", hit.Url); Console.WriteLine(); index++; } } else { var crawler = new SiteCrawler(website, new ConsoleConfig() { IndexFolder = indexFolder, Optimize = optimize }, new ConsoleLogWrapper()); crawler.AdhereToRobotRules = false; crawler.Crawl(); } #if DEBUG Console.WriteLine(); Console.Write("Press any key to continue"); Console.ReadKey(); #endif }