/// <summary> /// Enqueue a response. Start process if was manually stoped /// </summary> /// <exception cref="ArgumentException">If Url or OnDataArrived is not provider</exception> /// <param name="data">Item to scraper</param> public void Enqueue(ScraperData data) { if (string.IsNullOrWhiteSpace(data.Url)) { throw new ArgumentException("URL is required."); } if (data.OnDataArrived == null) { throw new ArgumentException("OnDataArrived is required."); } Uri uri; if (!Uri.TryCreate(data.Url, UriKind.RelativeOrAbsolute, out uri)) { throw new ArgumentException("URL '{0}' is invalid", data.Url); } //gets the domain var domain = uri.Authority.ToLower(); //If enqueue method was called in parallel, with no lock //could exists multiple consume threads for the same domain //With lock we fix this problem. lock (LockerObj) { //Check if exists a queue from domain if (Queues.Any(x => x.Key == domain)) { var queue = Queues[domain]; queue.Enqueue(ScrapperMapper.ToWrapper(data, domain, uri)); } else { var queue = new ConcurrentQueue <ScraperDataWrapper>(); queue.Enqueue(ScrapperMapper.ToWrapper(data, domain, uri)); if (!Queues.TryAdd(domain, queue)) { if (!Queues.Any(x => x.Key == domain)) { throw new Exception("Unexpected error when try to create a new Queue for domain " + domain); } } //start a new queue process var t = Task.Factory.StartNew(() => ConsumeFromQueue(domain, queue)); if (!_queueThreads.TryAdd(domain, t)) { if (!_queueThreads.Any(x => x.Key == domain)) { throw new Exception("Unexpected error when try to add a task of queue on QueueThreads for domain " + domain); } } } } }
public static ScraperDataResponse ToResponse(ScraperData item) { return(new ScraperDataResponse { Exception = null, OptionalArguments = item.OptionalArguments, Response = null, Url = item.Url, Proxy = item.Proxy, ScraperType = item.ScraperType }); }
public static ScraperDataWrapper ToWrapper(ScraperData item, string domain, Uri uri) { return(new ScraperDataWrapper { Domain = domain, Uri = uri, Url = item.Url, Proxy = item.Proxy, ScraperType = item.ScraperType, OptionalArguments = item.OptionalArguments, OnThrownException = item.OnThrownException, OnDequeue = item.OnDequeue, OnDataArrived = item.OnDataArrived }); }
private void RemoveItemFromRunningCollection(ScraperData item, string key, int retryCount = 0) { string dummyValue; var response = ScrapperMapper.ToResponse(item); if (!Running.TryRemove(key, out dummyValue)) { if (retryCount < MaxRetryCount) { RemoveItemFromRunningCollection(item, key, retryCount + 1); } else { response.Exception = new Exception("The scraper data response cannot be deleted from running collection."); item.OnThrownException?.Invoke(response); } } }
private static void Run() { _wasFinish = false; _fireHorse = FireHorseManager.Instance; var subscriptionKey = _fireHorse.SubscribeToEndProcess(OnFinish); _fireHorse.MaxRetryCount = 0; _chronometer = new Stopwatch(); _chronometer.Start(); foreach (var url in Data.URLS.Where(x => !x.Contains("aguasantofagasta"))) { var item = new ScraperData(); item.Url = url; item.OnDequeue = OnDequeue; item.OnDataArrived = OnDataArrived; item.OnThrownException = OnException; item.ScraperType = ScraperType.String; _fireHorse.Enqueue(item); _totalElementsCount++; } _fireHorse.Enqueue(new ScraperData { Url = Data.URLFILE, OnDequeue = OnDequeue, OnDataArrived = OnDataArrived, OnThrownException = OnException, ScraperType = ScraperType.Binary }); Task.Factory.StartNew(() => PrintData()); //Waits for an event _waitHandle.WaitOne(); }
private async void CheckUpdate(Object stateInfo) { try { using (var ethContext = new EthContext()) { var subbedUsers = ethContext.Users.Where(x => x.Subscribed).ToList(); if (subbedUsers.Count < 1) { return; } //data ScraperData linAlgFound = new ScraperData() { Exercises = new List <HtmlNode>(), Solutions = new List <HtmlNode>() }; ScraperData discMathFound = new ScraperData() { Exercises = new List <HtmlNode>(), Solutions = new List <HtmlNode>() }; ScraperData eprogFound = new ScraperData() { Exercises = new List <HtmlNode>() }; ScraperData algDatFound = new ScraperData() { Exercises = new List <HtmlNode>(), Solutions = new List <HtmlNode>() }; //linalg string linAlgUrl = "http://igl.ethz.ch/teaching/linear-algebra/la2017/"; var linAlg = ScraperService.ScrapeLinAlg(linAlgUrl); foreach (var algExercise in linAlg.Exercises) { string href = linAlgUrl + algExercise.Attributes["href"].Value; //Doesnt exist if (!ethContext.LinAlg.Any(x => x.Href == href)) { //add to linalg list linAlgFound.Exercises.Add(algExercise); //add to context ethContext.LinAlg.Add(new LinAlg() { Href = href }); await ethContext.SaveChangesAsync(); } } foreach (var algSolution in linAlg.Solutions) { string href = linAlgUrl + algSolution.Attributes["href"].Value; //Doesnt exist if (!ethContext.LinAlg.Any(x => x.Href == href)) { //add to linalg list linAlgFound.Solutions.Add(algSolution); //add to context ethContext.LinAlg.Add(new LinAlg() { Href = href }); await ethContext.SaveChangesAsync(); } } //discMath string discMathUrl = "http://www.crypto.ethz.ch/teaching/lectures/DM17/"; var discMath = ScraperService.ScrapeDiscMath(discMathUrl); foreach (var discMathExercise in discMath.Exercises) { string href = discMathUrl + discMathExercise.Attributes["href"].Value; //Doesnt exist if (!ethContext.DiscMath.Any(x => x.Href == href)) { //add to discmath list discMathFound.Exercises.Add(discMathExercise); //add to context ethContext.DiscMath.Add(new DiscMath() { Href = href }); await ethContext.SaveChangesAsync(); } } foreach (var discMathSolution in discMath.Solutions) { string href = discMathUrl + discMathSolution.Attributes["href"].Value; //Doesnt exist if (!ethContext.DiscMath.Any(x => x.Href == href)) { //add to discmath list discMathFound.Solutions.Add(discMathSolution); //add to context ethContext.DiscMath.Add(new DiscMath() { Href = href }); await ethContext.SaveChangesAsync(); } } //Eprog var eprog = ScraperService.ScrapeEprog(); foreach (var eprogExercise in eprog.Exercises) { string href = eprogExercise.Attributes["href"].Value; //Doesnt exist if (!ethContext.Eprog.Any(x => x.Href == href)) { //add to discmath list eprogFound.Exercises.Add(eprogExercise); //add to context ethContext.Eprog.Add(new Eprog() { Href = href }); await ethContext.SaveChangesAsync(); } } //algdat string algDatUrl = "https://www.cadmo.ethz.ch/education/lectures/HS17/DA/"; var algDat = ScraperService.ScrapeAlgDat(algDatUrl); foreach (var algDatExercise in algDat.Exercises) { string href = algDatUrl + algDatExercise.Attributes["href"].Value; //Doesnt exist if (!ethContext.AlgDat.Any(x => x.Href == href)) { //add to discmath list algDatFound.Exercises.Add(algDatExercise); //add to context ethContext.AlgDat.Add(new AlgDat() { Href = href }); await ethContext.SaveChangesAsync(); } } foreach (var algDatSolution in algDat.Solutions) { string href = algDatUrl + algDatSolution.Attributes["href"].Value; //Doesnt exist if (!ethContext.AlgDat.Any(x => x.Href == href)) { //add to discmath list algDatFound.Solutions.Add(algDatSolution); //add to context ethContext.AlgDat.Add(new AlgDat() { Href = href }); await ethContext.SaveChangesAsync(); } } //end of scrape //prepare message if (linAlgFound.Exercises.Count == 0 && linAlgFound.Solutions.Count == 0 && discMathFound.Exercises.Count == 0 && discMathFound.Solutions.Count == 0 && eprogFound.Exercises.Count == 0 && algDatFound.Exercises.Count == 0 && algDatFound.Solutions.Count == 0) { return; } var eb = new EmbedBuilder() { Color = Utility.ETHBlue, ThumbnailUrl = Utility.EthLogo }; int foundTypes = 0; //found linalg update if (linAlgFound.Exercises.Count > 0 || linAlgFound.Solutions.Count > 0) { foundTypes++; eb.Title = "⏰ Lin Alg Alert"; string value = ""; if (linAlgFound.Exercises.Count > 0) { value += "**New Exercise**\n"; foreach (var exercise in linAlgFound.Exercises) { int exNumer = linAlg.Exercises.IndexOf(exercise) + 1; value += $"[View Exercise {exNumer}]({linAlgUrl}{exercise.Attributes["href"].Value})\n"; } } if (linAlgFound.Solutions.Count > 0) { value += "**New Solution**\n"; foreach (var solution in linAlgFound.Solutions) { int solNumer = linAlg.Solutions.IndexOf(solution) + 1; value += $"[View Solution {solNumer}]({linAlgUrl}{solution.Attributes["href"].Value})\n"; } } eb.AddField(x => { x.IsInline = true; x.Name = "Linear Algebra"; x.Value = value; }); } //found discmath update if (discMathFound.Exercises.Count > 0 || discMathFound.Solutions.Count > 0) { foundTypes++; eb.Title = "⏰ Disc Math Alert"; string value = ""; if (discMathFound.Exercises.Count > 0) { value += "**New Exercise**\n"; foreach (var exercise in discMathFound.Exercises) { int exNumer = discMath.Exercises.IndexOf(exercise) + 1; value += $"[View Exercise {exNumer}]({discMathUrl}{exercise.Attributes["href"].Value})\n"; } } if (discMathFound.Solutions.Count > 0) { value += "**New Solution**\n"; foreach (var solution in discMathFound.Solutions) { int solNumer = discMath.Solutions.IndexOf(solution) + 1; value += $"[View Solution {solNumer}]({discMathUrl}{solution.Attributes["href"].Value})\n"; } } eb.AddField(x => { x.IsInline = true; x.Name = "Discrete Math"; x.Value = value; }); } //found algdat update if (algDatFound.Exercises.Count > 0 || algDatFound.Solutions.Count > 0) { foundTypes++; eb.Title = "⏰ A & D Alert"; string value = ""; if (algDatFound.Exercises.Count > 0) { value += "**New Exercise**\n"; foreach (var exercise in algDatFound.Exercises) { int exNumer = algDat.Exercises.IndexOf(exercise); value += $"[View Exercise {exNumer}]({algDatUrl}{exercise.Attributes["href"].Value})\n"; } } if (algDatFound.Solutions.Count > 0) { value += "**New Solution**\n"; foreach (var solution in algDatFound.Solutions) { int solNumer = algDat.Solutions.IndexOf(solution); value += $"[View Solution {solNumer}]({algDatUrl}{solution.Attributes["href"].Value})\n"; } } eb.AddField(x => { x.IsInline = true; x.Name = "A & D"; x.Value = value; }); } //found eprog update if (eprogFound.Exercises.Count > 0) { foundTypes++; eb.Title = "⏰ Eprog Alert"; string value = ""; if (eprogFound.Exercises.Count > 0) { value += "**New Exercise**\n"; foreach (var exercise in eprogFound.Exercises) { int exNumer = eprog.Exercises.IndexOf(exercise); value += $"[View Exercise {exNumer}]({exercise.Attributes["href"].Value})\n"; } } eb.AddField(x => { x.IsInline = true; x.Name = "Eprog"; x.Value = value; }); } //If more then 1 type of lecture was released then change the title if (foundTypes > 1) { eb.Title = "⏰ Subscriber Alert"; } //Send Message to all subscribed users foreach (var user in subbedUsers) { var userToNotify = _client.GetUser(user.UserId); if (userToNotify == null) { continue; } await(await userToNotify.GetOrCreateDMChannelAsync()).SendMessageAsync("", embed: eb); } } } catch (Exception e) { Console.WriteLine(e); } }