Пример #1
0
        public async Task Run(string url)
        {
            var htmlBody = await RequestManager.Request(url);

            if (htmlBody == null)
            {
                return;
            }
            var imageList = Regex.Matches(htmlBody, @"class=""fileThumb"" href=""(.*?)"" target");

            DownloadManager.InitDownload(imageList.Count);
            foreach (Match image in imageList)
            {
                var fileUrl  = "https:" + image.Groups[1].Value;
                var fileExt  = CrawlerManager.GetFileExt(fileUrl);
                var fileName = Regex.Match(fileUrl, @"http.*\/\/i\.4cdn\.org\/.*\/(.*)\.").Groups[1].Value;
                var filePath = UiManager.FolderBrowser.SelectedPath + "/" + fileName + fileExt;
                if (DownloadManager.IsFileAlreadyDownloaded(filePath))
                {
                    continue;
                }
                await DownloadManager.DownloadFile(fileUrl, filePath);

                DownloadManager.ChangeProgress();
            }
        }
Пример #2
0
        private static void RunAsConsole(string[] args)
        {
            Console.WriteLine("Starting...");
            Console.CancelKeyPress += OnCancelKeyPress;

            var enableCrawler = args.Contains("-c");
            var enableFeed    = args.Contains("-f");

            if (args.Contains("-h") || (!enableCrawler && !enableFeed))
            {
                Console.WriteLine("{0}TelegramToRss. Usage:{0}" +
                                  " -c\tEnable crawler: retrieve new posts and save them to DB.{0}" +
                                  " -f\tEnable web-API: serve RSS feed and CRUD for channels.{0}" +
                                  " -h\tShow this help.",
                                  Environment.NewLine);
                return;
            }

            CrawlerManager crawlerManager = null;

            if (enableCrawler)
            {
                var config = ConfigurationManager.GetConfiguration();
                crawlerManager = new CrawlerManager(ConfigurationManager.CreateStorage(config),
                                                    ConfigurationManager.CreateCrawlingConfig(config));

                Task.Run(() => crawlerManager
                         .RunAsync(CancellationToken.Token)
                         .ContinueWith(task => Console.WriteLine(task.Exception), CancellationToken.Token),
                         CancellationToken.Token);
            }

            IWebHost webHost = null;

            if (enableFeed)
            {
                webHost = BuildWebHost();

                Console.WriteLine("Started.");
                webHost.Run();
            }
            else
            {
                Console.WriteLine("Started.");
                ExitEvent.WaitOne();
            }

            Console.WriteLine("Stopping...");

            if (enableCrawler)
            {
                crawlerManager.Stop();
            }

            if (enableFeed)
            {
                webHost.StopAsync(CancellationToken.Token).GetAwaiter().GetResult();
            }
        }
Пример #3
0
 public MainWindow()
 {
     InitializeComponent();
     manager = CrawlerManager.GetInstance();
     dataGrid.ItemsSource = null;
     dataGrid.ItemsSource = manager.CrawlersRunning;
     isButtonsEnabeld(false);
 }
Пример #4
0
        public async Task <ActionResult <YouTubeLink> > PostYouTubeLink(YouTubeLink youTubeLink)
        {
            // _context.Links.Add(youTubeLink);
            // await _context.SaveChangesAsync();

            CrawlerManager.AddToCrawl(youTubeLink);

            return(CreatedAtAction(nameof(GetYouTubeLink), new { id = youTubeLink.Id }, youTubeLink));
        }
Пример #5
0
        public void Execute(IJobExecutionContext context)
        {
            string _crawlerInput  = "/App_Data/Crawlers/Input/";
            string _crawlerOutput = "/App_Data/Crawlers/Output/";
            string _appPath       = AppDomain.CurrentDomain.BaseDirectory;

            string _fullInput  = _appPath + _crawlerInput;
            string _fullOutput = _appPath + _crawlerOutput;

            CrawlerManager _crawlerManager = new CrawlerManager(_fullInput, _fullOutput);

            _crawlerManager.Run();
        }
        static void Main(string[] args)
        {
            TypeTree nodeTypeTree = TypeTree.LoadFromJson("arm-node-type-tree.json");

#if !DEBUG
            if (switchIndex(args, "v") >= 0)
#endif
            {
                Trace.Listeners.Clear();
                Trace.Listeners.Add(new ConsoleTraceListener());
            }

            var token = readParameter(args, "t");
#if !DEBUG
            if (string.IsNullOrEmpty(token))
            {
                Console.WriteLine("GitHub authentication token is required (pass it in using -t switch).");
                return;
            }
#else
            token = ConfigurationManager.AppSettings["GitHubToken"];
#endif

            var crawlerManagers = new CrawlerManager[]
            {
                new AzureQuickstartCrawlerManager(token)
            };

            var loop = Parallel.ForEach <CrawlerManager>(crawlerManagers,
                                                         (manager) =>
            {
                var graphs  = manager.CrawlAsync().Result;
                var graphDB = new GraphDBClient.CosmosDBClient(ConfigurationManager.AppSettings["CosmosDBEndpoint"],
                                                               ConfigurationManager.AppSettings["CosmosDBAuthKey"]);
                graphDB.Connect().WriteGraphs(ConfigurationManager.AppSettings["GraphDatabase"],
                                              ConfigurationManager.AppSettings["GraphCollection"], graphs);
            });

            while (!loop.IsCompleted)
            {
                Thread.Sleep(1000);
            }

            Console.WriteLine("Done!");
        }
Пример #7
0
        public MonitorViewModel()
        {
            StartCommand = new RelayCommand(OnStart);
            StopCommand  = new RelayCommand(OnStop);
            _stockModels = new ObservableCollection <StockModel>();
            _stockModels.CollectionChanged += _stockModels_CollectionChanged;

            _iStockRealTimeDealCrawler = CrawlerManager <IStockRealTimeDealCrawler> .GetCrawler();

            if (_iStockRealTimeDealCrawler != null)
            {
                _iStockRealTimeDealCrawler.StockRealTimeDealUpdatedEven += StockRealTimeDealCrawler_StockRealTimeDealUpdatedEven;
            }

            this.TotalCrawledCount = 0;

            InitData();
            Subscribe();
        }
Пример #8
0
        public async Task Run(string url)
        {
            string htmlBody = await RequestManager.Request(url);

            if (string.IsNullOrEmpty(htmlBody))
            {
                return;
            }
            var imageList = Regex.Matches(htmlBody, @"<a class=""image"" href=""(.*?)"" target=""_blank""");

            DownloadManager.InitDownload(imageList.Count);

            foreach (Match image in imageList)
            {
                string fileUrl  = image.Groups[1].Value;
                string fileName = fileUrl.GetHashCode().ToString("X");
                string fileExt  = CrawlerManager.GetFileExt(fileUrl);
                string filePath = UiManager.FolderBrowser.SelectedPath + "\\" + fileName + fileExt;
                await DownloadManager.DownloadFile(fileUrl, filePath);

                DownloadManager.ChangeProgress();
            }
        }
Пример #9
0
        private async void Start(object sender, RoutedEventArgs e)
        {
            if (UiManager.FolderBrowser.ShowDialog() == System.Windows.Forms.DialogResult.Cancel)
            {
                return;
            }

            UiManager.DisableUi();

            if (Search.Text.Contains("imgur.com"))
            {
                await CrawlerManager.Run(new Imgur(), Search.Text);
            }
            else if (Search.Text.Contains("4chan.org"))
            {
                await CrawlerManager.Run(new _4Chan(), Search.Text);
            }
            else if (Search.Text.Contains("cyberdrop.me"))
            {
                await CrawlerManager.Run(new Cyberdrop(), Search.Text);
            }
            UiManager.EnableUi();
        }
Пример #10
0
        private void InitCrawlHost()
        {
            //启动DCP,要在CrawlerManager启动之前启动
#if !DEBUG
            _dcpHost = DCPManager.Instance.CreateDCPService();
            _dcpHost.Open();
#endif
            CrawlerManager.Start();
            _currentPipeline = CrawlerManager.CrawlerFactory.Pipelines;
            PipeGridView.AutoGenerateColumns = false;
            PipeGridView.DataSource          = CrawlerManager.CrawlerFactory.Pipelines.Select(model => model.Info).ToArray();
            this.Text = "Palas Crawler : " + HooLab.Config.Configger.GetConfig("Palas.Crawler.Name", "No_Name");

            //启动Gecko监控窗口
            //if ((ConfigurationManager.AppSettings["WCFGeckoMode"] ?? "false") == "true")
            //{
            //    GeckoMonitor monitor = new GeckoMonitor();
            //    monitor.Show();
            //}

            if (AnalyzeType.ToUpper() != "STORM")
            {
                //启动分词监控
                if ((ConfigurationManager.AppSettings["WCFSpitterMode"] ?? "false") == "true")
                {
                    SplitWordMonitor splitWordMonitor = new SplitWordMonitor();
                    splitWordMonitor.Show();
                }
                //创建分析监听

                AnalyzeMatchMonitor analyzeMonitor = new AnalyzeMatchMonitor();
                analyzeMonitor.Show();
            }
            //启动AllJobMonitor监控窗口
            _monitorHost = WCFMonitorProxy.CreateServiceHost();
            _monitorHost.Open();
        }
Пример #11
0
 public Crawler(Uri url, CrawlerManager manager)
 {
     this.manager = manager;
     baseURL      = url;
     page         = new Page(url);
 }
Пример #12
0
 public AddCrawler()
 {
     InitializeComponent();
     manager = CrawlerManager.GetInstance();
 }