public async Task Run(string url) { var htmlBody = await RequestManager.Request(url); if (htmlBody == null) { return; } var imageList = Regex.Matches(htmlBody, @"class=""fileThumb"" href=""(.*?)"" target"); DownloadManager.InitDownload(imageList.Count); foreach (Match image in imageList) { var fileUrl = "https:" + image.Groups[1].Value; var fileExt = CrawlerManager.GetFileExt(fileUrl); var fileName = Regex.Match(fileUrl, @"http.*\/\/i\.4cdn\.org\/.*\/(.*)\.").Groups[1].Value; var filePath = UiManager.FolderBrowser.SelectedPath + "/" + fileName + fileExt; if (DownloadManager.IsFileAlreadyDownloaded(filePath)) { continue; } await DownloadManager.DownloadFile(fileUrl, filePath); DownloadManager.ChangeProgress(); } }
private static void RunAsConsole(string[] args) { Console.WriteLine("Starting..."); Console.CancelKeyPress += OnCancelKeyPress; var enableCrawler = args.Contains("-c"); var enableFeed = args.Contains("-f"); if (args.Contains("-h") || (!enableCrawler && !enableFeed)) { Console.WriteLine("{0}TelegramToRss. Usage:{0}" + " -c\tEnable crawler: retrieve new posts and save them to DB.{0}" + " -f\tEnable web-API: serve RSS feed and CRUD for channels.{0}" + " -h\tShow this help.", Environment.NewLine); return; } CrawlerManager crawlerManager = null; if (enableCrawler) { var config = ConfigurationManager.GetConfiguration(); crawlerManager = new CrawlerManager(ConfigurationManager.CreateStorage(config), ConfigurationManager.CreateCrawlingConfig(config)); Task.Run(() => crawlerManager .RunAsync(CancellationToken.Token) .ContinueWith(task => Console.WriteLine(task.Exception), CancellationToken.Token), CancellationToken.Token); } IWebHost webHost = null; if (enableFeed) { webHost = BuildWebHost(); Console.WriteLine("Started."); webHost.Run(); } else { Console.WriteLine("Started."); ExitEvent.WaitOne(); } Console.WriteLine("Stopping..."); if (enableCrawler) { crawlerManager.Stop(); } if (enableFeed) { webHost.StopAsync(CancellationToken.Token).GetAwaiter().GetResult(); } }
public MainWindow() { InitializeComponent(); manager = CrawlerManager.GetInstance(); dataGrid.ItemsSource = null; dataGrid.ItemsSource = manager.CrawlersRunning; isButtonsEnabeld(false); }
public async Task <ActionResult <YouTubeLink> > PostYouTubeLink(YouTubeLink youTubeLink) { // _context.Links.Add(youTubeLink); // await _context.SaveChangesAsync(); CrawlerManager.AddToCrawl(youTubeLink); return(CreatedAtAction(nameof(GetYouTubeLink), new { id = youTubeLink.Id }, youTubeLink)); }
public void Execute(IJobExecutionContext context) { string _crawlerInput = "/App_Data/Crawlers/Input/"; string _crawlerOutput = "/App_Data/Crawlers/Output/"; string _appPath = AppDomain.CurrentDomain.BaseDirectory; string _fullInput = _appPath + _crawlerInput; string _fullOutput = _appPath + _crawlerOutput; CrawlerManager _crawlerManager = new CrawlerManager(_fullInput, _fullOutput); _crawlerManager.Run(); }
static void Main(string[] args) { TypeTree nodeTypeTree = TypeTree.LoadFromJson("arm-node-type-tree.json"); #if !DEBUG if (switchIndex(args, "v") >= 0) #endif { Trace.Listeners.Clear(); Trace.Listeners.Add(new ConsoleTraceListener()); } var token = readParameter(args, "t"); #if !DEBUG if (string.IsNullOrEmpty(token)) { Console.WriteLine("GitHub authentication token is required (pass it in using -t switch)."); return; } #else token = ConfigurationManager.AppSettings["GitHubToken"]; #endif var crawlerManagers = new CrawlerManager[] { new AzureQuickstartCrawlerManager(token) }; var loop = Parallel.ForEach <CrawlerManager>(crawlerManagers, (manager) => { var graphs = manager.CrawlAsync().Result; var graphDB = new GraphDBClient.CosmosDBClient(ConfigurationManager.AppSettings["CosmosDBEndpoint"], ConfigurationManager.AppSettings["CosmosDBAuthKey"]); graphDB.Connect().WriteGraphs(ConfigurationManager.AppSettings["GraphDatabase"], ConfigurationManager.AppSettings["GraphCollection"], graphs); }); while (!loop.IsCompleted) { Thread.Sleep(1000); } Console.WriteLine("Done!"); }
public MonitorViewModel() { StartCommand = new RelayCommand(OnStart); StopCommand = new RelayCommand(OnStop); _stockModels = new ObservableCollection <StockModel>(); _stockModels.CollectionChanged += _stockModels_CollectionChanged; _iStockRealTimeDealCrawler = CrawlerManager <IStockRealTimeDealCrawler> .GetCrawler(); if (_iStockRealTimeDealCrawler != null) { _iStockRealTimeDealCrawler.StockRealTimeDealUpdatedEven += StockRealTimeDealCrawler_StockRealTimeDealUpdatedEven; } this.TotalCrawledCount = 0; InitData(); Subscribe(); }
public async Task Run(string url) { string htmlBody = await RequestManager.Request(url); if (string.IsNullOrEmpty(htmlBody)) { return; } var imageList = Regex.Matches(htmlBody, @"<a class=""image"" href=""(.*?)"" target=""_blank"""); DownloadManager.InitDownload(imageList.Count); foreach (Match image in imageList) { string fileUrl = image.Groups[1].Value; string fileName = fileUrl.GetHashCode().ToString("X"); string fileExt = CrawlerManager.GetFileExt(fileUrl); string filePath = UiManager.FolderBrowser.SelectedPath + "\\" + fileName + fileExt; await DownloadManager.DownloadFile(fileUrl, filePath); DownloadManager.ChangeProgress(); } }
private async void Start(object sender, RoutedEventArgs e) { if (UiManager.FolderBrowser.ShowDialog() == System.Windows.Forms.DialogResult.Cancel) { return; } UiManager.DisableUi(); if (Search.Text.Contains("imgur.com")) { await CrawlerManager.Run(new Imgur(), Search.Text); } else if (Search.Text.Contains("4chan.org")) { await CrawlerManager.Run(new _4Chan(), Search.Text); } else if (Search.Text.Contains("cyberdrop.me")) { await CrawlerManager.Run(new Cyberdrop(), Search.Text); } UiManager.EnableUi(); }
private void InitCrawlHost() { //启动DCP,要在CrawlerManager启动之前启动 #if !DEBUG _dcpHost = DCPManager.Instance.CreateDCPService(); _dcpHost.Open(); #endif CrawlerManager.Start(); _currentPipeline = CrawlerManager.CrawlerFactory.Pipelines; PipeGridView.AutoGenerateColumns = false; PipeGridView.DataSource = CrawlerManager.CrawlerFactory.Pipelines.Select(model => model.Info).ToArray(); this.Text = "Palas Crawler : " + HooLab.Config.Configger.GetConfig("Palas.Crawler.Name", "No_Name"); //启动Gecko监控窗口 //if ((ConfigurationManager.AppSettings["WCFGeckoMode"] ?? "false") == "true") //{ // GeckoMonitor monitor = new GeckoMonitor(); // monitor.Show(); //} if (AnalyzeType.ToUpper() != "STORM") { //启动分词监控 if ((ConfigurationManager.AppSettings["WCFSpitterMode"] ?? "false") == "true") { SplitWordMonitor splitWordMonitor = new SplitWordMonitor(); splitWordMonitor.Show(); } //创建分析监听 AnalyzeMatchMonitor analyzeMonitor = new AnalyzeMatchMonitor(); analyzeMonitor.Show(); } //启动AllJobMonitor监控窗口 _monitorHost = WCFMonitorProxy.CreateServiceHost(); _monitorHost.Open(); }
public Crawler(Uri url, CrawlerManager manager) { this.manager = manager; baseURL = url; page = new Page(url); }
public AddCrawler() { InitializeComponent(); manager = CrawlerManager.GetInstance(); }