public async Task StopAsync() { lock (mStatusSyncLock) { if (Status != CrawlerStatus.RUNNING) { return; } } mLogger.Log(nameof(Crawler), "Stopping"); mCancellationTokenSource.Cancel(); await Task.Run(() => { try { Task.WaitAll(mTasks.Keys.ToArray()); lock (mStatusSyncLock) { Status = CrawlerStatus.STOPPED; } } finally { mCancellationTokenSource.Dispose(); } }); mLogger.Log(nameof(Crawler), "Stopped"); }
void HandleTrigger(string argument) { switch (argument) { case "GoUp": _crawlerStatus = CrawlerStatus.GoUp; _lowerLandingGear.Lock(); _upperLandingGear.Unlock(); ExtendPistons(); break; case "GoDown": _crawlerStatus = CrawlerStatus.GoDown; _upperLandingGear.Lock(); _lowerLandingGear.Unlock(); ExtendPistons(); break; case "Stop": _crawlerStatus = CrawlerStatus.Stop; break; default: Echo($"Invalid option: {argument}\n"); break; } }
public async Task StartAsync(bool restart = false) { mLogger.Log(nameof(Crawler), "Running"); lock (mStatusSyncLock) { if (Status == CrawlerStatus.RUNNING) { return; } Status = CrawlerStatus.RUNNING; } if (restart) { mUrlFrontier = new SimpleUrlFrontier(mConfig); } mTasks.Clear(); mFetchSemaphore = new SemaphoreSlim(mConfig.MaxFetchingConcurrency); mCancellationTokenSource = new CancellationTokenSource(); var token = mCancellationTokenSource.Token; await Task.Run(() => { while (true) { if (token.IsCancellationRequested) { break; } string url = null; try { url = mUrlFrontier.PopUrl(); } catch (Exception e) { mLogger.LogException(nameof(Crawler), "Failed to pop url", e); mErrorLogger.LogException(nameof(Crawler), "Failed to pop url", e); } if (url != null) { FetchUrlAsync(url); } else if (mTasks.Keys.Any()) { Task.WaitAny(mTasks.Keys.ToArray()); } else { Thread.Sleep(5000); } } }, token); }
public XtreamCrawler(DictionaryManager dictionary, string server, int concurrent_tasks, bool force_crawl, bool uppercase) { this.dictionary = dictionary; this.server = server; client = new WebClient(); status = CrawlerStatus.Idle; this.concurrent_tasks = concurrent_tasks; this.force_crawl = force_crawl; this.uppercase = uppercase; }
public Crawler(CrawlerConfig config, IUrlFrontier urlFrontier, IFetcher fetcher, ISimilarContentManager similarContentManager, List <IUrlFilter> urlFilters) { mConfig = config; Status = CrawlerStatus.STOPPED; mUrlFrontier = urlFrontier; mFetcher = fetcher; mSimilarContentJudger = similarContentManager; mUrlFilters = urlFilters; mLogger = new RuntimeLogger(Path.Combine(config.LogDirectory, "Crawler.Log"), true); mErrorLogger = new RuntimeLogger(Path.Combine(config.LogDirectory, "Crawler Error.Log"), false); }
public static string ToAppStateName(this CrawlerStatus status) { switch (status) { case CrawlerStatus.Ready: return("Не запущен"); case CrawlerStatus.Run: return("Запущен"); case CrawlerStatus.Done: return("Работа выполнена"); default: return("Статус не определен"); } }
private void Crawl_Manager() { string output_result = string.Empty; bool found_result = false; progress = 0; num_results = 0; try { bool valid_site; if(force_crawl) { valid_site = true; } else { string page = client.DownloadString(server.Trim()); valid_site = page != "" && page.Contains("Xtream Codes"); } if (valid_site && dictionary.entries.Count > 0) { while (progress < dictionary.entries.Count) { int increment; if ((progress + concurrent_tasks) < dictionary.entries.Count) { increment = concurrent_tasks; } else { increment = dictionary.entries.Count - progress; } crawl_tasks = new CrawlTaskData[increment]; for (int j = 0; j < crawl_tasks.Length; j++) { int x = progress + j; string search_string; if(uppercase) { search_string = dictionary.entries[x]; } else { search_string = char.ToUpper(dictionary.entries[x][0]) + dictionary.entries[x].Substring(1); } crawl_tasks[j] = new CrawlTaskData(); crawl_tasks[j].task.WorkerSupportsCancellation = true; crawl_tasks[j].task.WorkerReportsProgress = true; crawl_tasks[j].task.DoWork += new DoWorkEventHandler(Crawl_Method); var arguments = Tuple.Create<string, int>(search_string, j); crawl_tasks[j].task.RunWorkerAsync(arguments); } WaitWorkers(); foreach (CrawlTaskData crawl_task in crawl_tasks) { if (crawl_task.result != string.Empty) { output_result = crawl_task.result; found_result = true; num_results++; if (!Directory.Exists("output")) Directory.CreateDirectory("output"); string output = "output/tv_channels_" + num_results + ".m3u"; StreamWriter outputFile = new StreamWriter(output, true); outputFile.WriteLine(output_result); outputFile.Flush(); outputFile.Close(); outputFile.Dispose(); } } progress += increment; } if(found_result) { status = CrawlerStatus.CompletedWithResults; } else { status = CrawlerStatus.CompletedWithoutResults; } } else { status = CrawlerStatus.InvalidSite; } } catch (Exception ex) { Console.WriteLine(ex.Message); status = CrawlerStatus.InvalidSite; } }
public void DoCrawl(object sender, DoWorkEventArgs e) { status = CrawlerStatus.Working; Crawl_Manager(); }
public async void Run() { if (Status != CrawlerStatus.Ready) { return; } List <string> codesList = Repository.EuroMade.GetAllVendorCodes().ToList(); codesList = codesList.Select(x => x.ToLower()).ToList(); HashSet <string> codes = new HashSet <string>(codesList); status = CrawlerStatus.Run; CategoryQueue = Variables.CategoryQueue; const string domainName = "https://www.verkkokauppa.com"; while (CategoryQueue.Any()) { Category currentCategory = CategoryQueue.Dequeue(); var pageIndex = 1; var currentURL = currentCategory.URL.ToString() + pageIndex; var config = Configuration.Default.WithDefaultLoader(); var document = await BrowsingContext.New(config).OpenAsync(currentURL); var pageSelector = ".page-selector__page-count-indicator"; var pagesCountString = document.QuerySelector(pageSelector) .TextContent .Replace("/", string.Empty) .Trim(); if (!int.TryParse(pagesCountString, out int pagesCountValue)) { continue; } for (pageIndex = 1; pageIndex <= pagesCountValue; pageIndex++) { currentURL = currentCategory.URL.ToString() + pageIndex; config = Configuration.Default.WithDefaultLoader(); document = await BrowsingContext.New(config).OpenAsync(currentURL); var cellSelector = ".list-product"; var cells = document.QuerySelectorAll(cellSelector); var siteItems = new List <VerkkItem>(); foreach (var c in cells) { List <string> problems = new List <string>(); int vendorCode = -1; string name = "error_value"; Uri url = new Uri("https://www.ERROR_URL.com"); bool discount = false; Decimal price = new decimal(-1); try { bool isConverted = int.TryParse(c.QuerySelectorAll(".image__product-id").Select(e => e.TextContent).First(), out int number); if (!isConverted) { problems.Add("Артикул не является числом."); } vendorCode = number; } catch (InvalidOperationException) { problems.Add("Артикул не найден."); } try { name = c.QuerySelectorAll(".list-product-link__name") .Select(e => e.TextContent) .First(); } catch (InvalidOperationException) { problems.Add("Наименование товара не найдено"); } try { string partialUrl = c.QuerySelectorAll(".data__list-product-link") .Select(e => e.Attributes.GetNamedItem("href").Value) .First(); url = new Uri(domainName + partialUrl); } catch (InvalidOperationException) { problems.Add("URL не найдено"); } catch (UriFormatException) { problems.Add("Не удается преобразовать URL"); } string itemUrl = "https://www.verkkokauppa.com" + c.QuerySelector(".data__list-product-link")?.GetAttribute("href"); bool uploaded = false; if (vendorCode != -1) { if (codes.Contains("v" + vendorCode)) { uploaded = true; } } var goodsPage = await BrowsingContext.New(config).OpenAsync(itemUrl); var discountList = goodsPage.QuerySelector(".price-tag-discount__amount"); if (discountList != null) { discount = true; } else { discount = false; } try { var priceVerkk = goodsPage .QuerySelector(".price-tag-content__price-tag-price--current .price-tag-price__euros") ?.GetAttribute("content"); bool isConverted = decimal.TryParse( priceVerkk, NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out decimal priceVal); if (!isConverted) { problems.Add("Цена не является числом."); } price = priceVal; } catch (InvalidOperationException) { problems.Add("Цена не найдена."); } if (problems.Any()) { Repository.Logger.Log(ErrorFrom.Crawler, problems, new Uri(currentURL)); } else { siteItems.Add(new VerkkItem { Discount = discount, Name = name, PriceEuro = price, ULR = url, VendorCode = vendorCode, Cathegory = currentCategory.RusName, Uploaded = uploaded }); } } if (siteItems.Any()) { Repository.VerkkItems.Insert(siteItems); } } } status = CrawlerStatus.Done; }
protected void AddModule(CrawlerStatus portal) { _portais.Add(portal); }
private void Crawl_Manager() { string output_result = string.Empty; bool found_result = false; progress = 0; num_results = 0; try { bool valid_site; if (force_crawl) { valid_site = true; } else { string page = client.DownloadString(server.Trim()); valid_site = page != "" && page.Contains("Xtream Codes"); } if (valid_site && dictionary.entries.Count > 0) { while (progress < dictionary.entries.Count) { int increment; if ((progress + concurrent_tasks) < dictionary.entries.Count) { increment = concurrent_tasks; } else { increment = dictionary.entries.Count - progress; } crawl_tasks = new CrawlTaskData[increment]; for (int j = 0; j < crawl_tasks.Length; j++) { int x = progress + j; string search_string; if (uppercase) { search_string = dictionary.entries[x]; } else { search_string = char.ToUpper(dictionary.entries[x][0]) + dictionary.entries[x].Substring(1); } crawl_tasks[j] = new CrawlTaskData(); crawl_tasks[j].task.WorkerSupportsCancellation = true; crawl_tasks[j].task.WorkerReportsProgress = true; crawl_tasks[j].task.DoWork += new DoWorkEventHandler(Crawl_Method); var arguments = Tuple.Create <string, int>(search_string, j); crawl_tasks[j].task.RunWorkerAsync(arguments); } WaitWorkers(); foreach (CrawlTaskData crawl_task in crawl_tasks) { if (crawl_task.result != string.Empty) { output_result = crawl_task.result; found_result = true; num_results++; if (!Directory.Exists("output")) { Directory.CreateDirectory("output"); } string output = "output/tv_channels_" + num_results + ".m3u"; StreamWriter outputFile = new StreamWriter(output, true); outputFile.WriteLine(output_result); outputFile.Flush(); outputFile.Close(); outputFile.Dispose(); } } progress += increment; } if (found_result) { status = CrawlerStatus.CompletedWithResults; } else { status = CrawlerStatus.CompletedWithoutResults; } } else { status = CrawlerStatus.InvalidSite; } } catch (Exception ex) { Console.WriteLine(ex.Message); status = CrawlerStatus.InvalidSite; } }
public TwitterStreamClient(Queue <string> unprocessedTweets, CrawlerStatus crawlerStatus, ref Object syncRoot) { UnprocessedTweets = unprocessedTweets; SyncRoot = syncRoot; CrawlerStatus = crawlerStatus; }
static void Main(string[] args) { CrawlerStatus = new CrawlerStatus() { KeepRunning = true }; UnprocessedHashtags = new Queue <string>(); UnprocessedRetweets = new Queue <string>(); UnprocessedTweets = new Queue <string>(); StreamReaderThread = new Thread(ReadTweets); StreamReaderThread.Start(); Console.WriteLine(""); Console.WriteLine("---------------------------------------------------"); Console.WriteLine("Reading dictionary..."); LoadDictionary(); Console.WriteLine(Dictionary.Count + " dictionary entries read."); Console.WriteLine("---------------------------------------------------"); Console.WriteLine(""); StreamProcessorThread = new Thread(ProcessTweets); StreamProcessorThread.Start(); StatusMessageThread = new Thread(PrintStatusMessage); StatusMessageThread.Start(); TweetClassifierThread = new Thread(TweetClassifier); TweetClassifierThread.Start(); HashtagRetweetProcessorThread = new Thread(HashtagRetweetProcessor); HashtagRetweetProcessorThread.Start(); Console.ReadLine(); CrawlerStatus.KeepRunning = false; lock (_syncRootOutput) { Console.WriteLine(""); Console.WriteLine("Stopping Threads....."); Console.WriteLine(""); Console.WriteLine("Attempting stop Tweet Classifier...."); } TweetClassifierThread.Join(); lock (_syncRootOutput) { Console.WriteLine("Attempting stop Stream Reader...."); } StreamReaderThread.Join(); lock (_syncRootOutput) { Console.WriteLine("Attempting stop Stream Processor...."); } StreamProcessorThread.Join(); lock (_syncRootOutput) { Console.WriteLine("Attempting stop Status Message...."); } StatusMessageThread.Join(); lock (_syncRootOutput) { Console.WriteLine("Attempting stop Hashtag Retweet Processor...."); } HashtagRetweetProcessorThread.Join(); }
public Program() { var error = false; _upperLandingGear = GridTerminalSystem.GetBlockWithName("Lower Landing Gear") as IMyLandingGear; _lowerLandingGear = GridTerminalSystem.GetBlockWithName("Upper Landing Gear") as IMyLandingGear; _portBaseLandingGear = GridTerminalSystem.GetBlockWithName("Port Base Landing Gear") as IMyLandingGear; _starboardBaseLandingGear = GridTerminalSystem.GetBlockWithName("Starboard Base Landing Gear") as IMyLandingGear; _piston1 = GridTerminalSystem.GetBlockWithName("Piston 1") as IMyExtendedPistonBase; _piston2 = GridTerminalSystem.GetBlockWithName("Piston 2") as IMyExtendedPistonBase; _piston3 = GridTerminalSystem.GetBlockWithName("Piston 3") as IMyExtendedPistonBase; if (_upperLandingGear == null) { Echo("No top landing gear"); error = true; } if (_lowerLandingGear == null) { Echo("No bottom landing gear"); error = true; } if (_portBaseLandingGear == null) { Echo("No port landing gear"); error = true; } if (_starboardBaseLandingGear == null) { Echo("No starboard landing gear"); error = true; } if (_piston1 == null) { Echo("No piston #1"); error = true; } if (_piston2 == null) { Echo("No piston #2"); error = true; } if (_piston3 == null) { Echo("No piston #3"); error = true; } if (error) { Echo("Errors found. Please fix and recompile"); return; } Runtime.UpdateFrequency = UpdateFrequency.Update10; _upperLandingGear.AutoLock = false; _upperLandingGear.Lock(); _lowerLandingGear.AutoLock = false; _lowerLandingGear.Lock(); _portBaseLandingGear.AutoLock = false; _starboardBaseLandingGear.AutoLock = false; _crawlerStatus = CrawlerStatus.Stop; _init = true; Echo($"Crawler startup @ {DateTime.Now}"); }