public static async Task Run([TimerTrigger("00 */10 * * * *")] TimerInfo myTimer, ILogger log) { if (!_hasRunInit) { Init(); } CloudTable oauthTable = CloudStorageAccount .Parse(_storage) .CreateCloudTableClient() .GetTableReference("oauth"); if (checkTableExists) { oauthTable.CreateIfNotExists(); } CloudTable articleTable = CloudStorageAccount .Parse(_storage) .CreateCloudTableClient() .GetTableReference("article"); if (checkTableExists) { articleTable.CreateIfNotExists(); checkTableExists = true; } RedditOAuth result = (RedditOAuth)oauthTable .Execute( TableOperation.Retrieve <RedditOAuth>("reddit", _user) ).Result; //https://blog.maartenballiauw.be/post/2012/10/08/what-partitionkey-and-rowkey-are-for-in-windows-azure-table-storage.html //https://www.red-gate.com/simple-talk/cloud/cloud-data/an-introduction-to-windows-azure-table-storage/ if (result?.GetNewToken < DateTimeOffset.Now) { result = null; log.LogInformation("need a new token"); } Reddit r = null; BotWebAgent agent = null; bool saveToken = false; bool tryLogin = false; int tryLoginAttempts = 2; do { tryLoginAttempts--; tryLogin = false; if (result == null) { agent = new BotWebAgent(_user, _pass, _clientId, _secret, "https://www.reddit.com/user/somekindofbot0000/"); result = new RedditOAuth() { Token = agent.AccessToken, GetNewToken = DateTimeOffset.Now.AddMinutes(57), PartitionKey = "reddit", RowKey = _user }; r = new Reddit(agent, true); saveToken = true; } else { try { r = new Reddit(result.Token); } catch (AuthenticationException a) { result = null; tryLogin = true; } catch (WebException w) { if (w.Status == WebExceptionStatus.ProtocolError && (w.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.Unauthorized) { result = null; tryLogin = true; } else { throw; } } } } while (tryLogin && tryLoginAttempts > 0); if (r == null) { throw new Exception("couldn't get logged in"); } if (saveToken) { oauthTable .Execute( TableOperation.InsertOrReplace(result)); log.LogInformation("saving token"); } NewsBankClient newsBankClient = new NewsBankClient( new EnvironmentVariableEZProxySignInUriProvider(), EditForNewsbank._credProvider, new EnvironmentVariableProductBaseUriProvider(), new BasicCanLog(log)); Task <HttpResponseMessage> checkMailTask = CheckMail(r, log, client); //https://www.reddit.com/r/SeattleWA/search?q=%28+site%3Aseattletimes.com+Subreddit%3ASeattleWA+%29&sort=new&t=day Listing <Post> posts = r.AdvancedSearch(x => x.Subreddit == "SeattleWA" && x.Site == "seattletimes.com" , Sorting.New, TimeSorting.Day); bool allPostsSuccess = true; using (WaybackClient waybackMachine = new WaybackClient()) { foreach (Post p in posts.TakeWhile(x => !x.IsHidden)) { allPostsSuccess &= await ProcessPost(p, waybackMachine, log, articleTable, newsBankClient); } } if (checkMailTask.Status < TaskStatus.RanToCompletion) { log.LogInformation("waiting for checkmail"); await checkMailTask; } else { log.LogInformation(checkMailTask.Status.ToString()); } log.LogInformation($"C# Timer trigger function executed at: {DateTime.Now}"); if (!allPostsSuccess) { throw new ApplicationException("Not all Posts were processed successfully"); } }
private static async Task <bool> ProcessPost(Post p, WaybackClient waybackClient, ILogger log, CloudTable articleTable, NewsBankClient newsBankClient) { bool successProcessPost = true; try { if (!Debug) { p.Hide(); } Uri archivedUrl = null; log.LogInformation(p.Url.ToString()); Uri target = new Uri(p.Url.GetComponents(UriComponents.Host | UriComponents.Path | UriComponents.Scheme, UriFormat.SafeUnescaped)); using (Task <AvailableResponse> response = waybackClient.AvailableAsync(target)) using (Task <HttpResponseMessage> targetGetResponse = client.GetAsync(target)) { Task <Comment> commentTask = response.ContinueWith(async x => { AvailableResponse availableResponse = x.Result; short attempts = 2; bool success = false; do { attempts--; if (availableResponse?.archived_snapshots?.closest?.available == true) { archivedUrl = availableResponse.archived_snapshots.closest.url; log.LogInformation("using available snapshot."); success = true; } else { log.LogInformation("creating snapshot."); archivedUrl = await waybackClient.SaveAsync(target); short validationAttempts = 2; do { validationAttempts--; using (HttpResponseMessage responseCheck = await client.GetAsync(archivedUrl)) { if (!responseCheck.IsSuccessStatusCode || responseCheck.StatusCode == HttpStatusCode.NotFound) { log.LogWarning($"404 returned from archive.org using provided response url. \nstatuscode:{responseCheck.StatusCode} \narchiveURL:{archivedUrl}"); Thread.Sleep(100); } else { log.LogInformation("check returned success."); success = true; } } } while (validationAttempts > 0 && !success); } } while (attempts > 0 && !success); if (!success) { successProcessPost = false; throw new ApplicationException("Wayback machine wouldn't cache content."); } string msg = $@"[Archive.org version.]({archivedUrl.ToString()}) :0: ---- ^^You ^^can ^^support ^^Archive.org ^^via [^^(Amazon) ^^(Smile)](https://smile.amazon.com/ch/94-3242767) ^^You ^^can ^^support ^^Seattle ^^Public ^^Library ^^via [^^(Amazon) ^^(Smile)](https://smile.amazon.com/ch/91-1140642) ^^I'm ^^a ^^bot, ^^beep ^^boop [ ^^((fork) ^^(me) ^^(on) ^^(github))](https://github.com/czf/ArchiveBot)"; log.LogInformation(msg); Comment comment = null; if (!Debug) { comment = p.Comment(msg); } return(comment); }, TaskContinuationOptions.OnlyOnRanToCompletion).Unwrap(); Comment c = await commentTask; await Task.WhenAll(targetGetResponse, commentTask).ContinueWith( x => { log.LogInformation("start newsbank"); Comment comment = commentTask.Result; using (HttpResponseMessage articleResponse = targetGetResponse.Result) { if (articleResponse == null) { log.LogInformation("articleResponse is null"); } SeattleTimesArticle seattleTimesArticle = new SeattleTimesArticle(articleResponse); if (seattleTimesArticle.PublishDate.Date < DateTime.Now.Date) { log.LogInformation("article post is at least a day old, will make newsbank edit."); EditForNewsbank.GetCommentLine(new ArticlePost(seattleTimesArticle, comment), log, newsBankClient ).ContinueWith(y => { if (!String.IsNullOrEmpty(y.Result)) { EditForNewsbank.EditComment(y.Result, comment); log.LogInformation("article post has been edited."); } else { log.LogInformation("commentline null or empty will store article post"); articleTable.Execute(TableOperation.InsertOrReplace(new ArticlePost(seattleTimesArticle, comment))); } }); } else { log.LogInformation("will store article post"); articleTable.Execute(TableOperation.InsertOrReplace(new ArticlePost(seattleTimesArticle, comment))); } } }, TaskContinuationOptions.OnlyOnRanToCompletion); //TODO Dispose AvailableResponse; } } catch (Exception e) { log.LogError("", e); successProcessPost = false; } return(successProcessPost); }
internal static async Task <string> GetCommentLine(ArticlePost articlePost, ILogger log, NewsBankClient newsBankClient) { log.LogInformation("GetCommentLine"); SearchResult searchResult = null; try { searchResult = await newsBankClient.Search( new SearchRequest() { Product = Product.WorldNews, Publications = new List <Publication>() { Publication.SeattleTimesWebEditionArticles }, SearchParameter0 = new SearchParameter() { Field = SearchField.Author, Value = articlePost.ArticleAuthor.Replace("/", string.Empty) }, SearchParameter1 = new SearchParameter() { Field = SearchField.Headline, Value = $"\"{articlePost.ArticleHeadline}\"", ParameterCompoundOperator = CompoundOperator.AND }, SearchParameter2 = new SearchParameter() { Field = SearchField.Date, Value = articlePost.ArticleDate.ToShortDateString(), ParameterCompoundOperator = CompoundOperator.AND } }); } catch (NullReferenceException nullRefEx) //not the best option. { log.LogError("possible no Web edition result, " + articlePost.CommentUri); log.LogError(nullRefEx.Message); log.LogError(nullRefEx.StackTrace); return(string.Empty); } catch (Exception e) { log.LogError(e.Message); throw; } return($"[NewsBank version]({searchResult.FirstSearchResultItem.ResultItemUri}) via SPL [^(SPL) ^(account) ^(required)](https://www.spl.org/using-the-library/get-started/get-started-with-a-library-card/library-card-application)"); }
public static async Task Run([TimerTrigger("0 0 07 * * *")] TimerInfo myTimer, ILogger log) { NewsBankClient newsBankClient = new NewsBankClient( new EnvironmentVariableEZProxySignInUriProvider(), _credProvider, new EnvironmentVariableProductBaseUriProvider(), new BasicCanLog(log) ); CloudTable articleTable = CloudStorageAccount .Parse(_storage) .CreateCloudTableClient() .GetTableReference("article"); if (checkTableExists) { articleTable.CreateIfNotExists(); } DateTime today = new DateTime(DateTime.Today.Ticks, DateTimeKind.Utc); TableQuery <ArticlePost> articlesPublishedBeforeToday = articleTable.CreateQuery <ArticlePost>().Where(x => x.ArticleDate <today && x.ArticleDate> today.AddDays(-27)).AsTableQuery(); CloudTable oauthTable = CloudStorageAccount .Parse(_storage) .CreateCloudTableClient() .GetTableReference("oauth"); if (checkTableExists) { checkTableExists = false; oauthTable.CreateIfNotExists(); } RedditOAuth result = (RedditOAuth)oauthTable .Execute( TableOperation.Retrieve <RedditOAuth>("reddit", _user) ).Result; //https://blog.maartenballiauw.be/post/2012/10/08/what-partitionkey-and-rowkey-are-for-in-windows-azure-table-storage.html //https://www.red-gate.com/simple-talk/cloud/cloud-data/an-introduction-to-windows-azure-table-storage/ if (result?.GetNewToken < DateTimeOffset.Now) { result = null; log.LogInformation("need a new token"); } Reddit r = null; BotWebAgent agent = null; bool saveToken = false; bool tryLogin = false; int tryLoginAttempts = 2; do { tryLoginAttempts--; tryLogin = false; if (result == null) { agent = new BotWebAgent(_user, _pass, _clientId, _secret, "https://www.reddit.com/user/somekindofbot0000/"); result = new RedditOAuth() { Token = agent.AccessToken, GetNewToken = DateTimeOffset.Now.AddMinutes(57), PartitionKey = "reddit", RowKey = _user }; r = new Reddit(agent, true); saveToken = true; } else { try { r = new Reddit(result.Token); } catch (AuthenticationException a) { result = null; tryLogin = true; } catch (WebException w) { if (w.Status == WebExceptionStatus.ProtocolError && (w.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.Unauthorized) { result = null; tryLogin = true; } else { throw; } } } } while (tryLogin && tryLoginAttempts > 0); if (r == null) { throw new Exception("couldn't get logged in"); } if (saveToken) { oauthTable .Execute( TableOperation.InsertOrReplace(result)); log.LogInformation("saving token"); } List <Task> updateCommentTasks = new List <Task>(); foreach (ArticlePost ap in articlesPublishedBeforeToday) { Task updateComment = GetCommentLine(ap, log, newsBankClient) .ContinueWith(async(commentLine) => { bool retry = false; do { Comment c = r.GetComment(new Uri("https://www.reddit.com" + ap.CommentUri)); if (!String.IsNullOrEmpty(commentLine.Result)) { EditComment(commentLine.Result, c); articleTable.Execute(TableOperation.Delete(ap)); retry = false; } else { log.LogInformation("Empty CommentLine, will check headline."); retry = await TryUpdateArticleData(ap, r, articleTable, log); if (retry) { log.LogInformation($"author: {ap.ArticleAuthor} ---- headline: {ap.ArticleHeadline}"); } } } while (retry); } , TaskContinuationOptions.OnlyOnRanToCompletion).Unwrap(); updateCommentTasks.Add(updateComment); } await Task.WhenAll(updateCommentTasks.ToArray()); log.LogInformation("AwaitWhenALL " + updateCommentTasks.Count.ToString()); }
private static async Task ProcessPost(Post p, WaybackClient waybackClient, TraceWriter log, CloudTable articleTable, NewsBankClient newsBankClient) { try { if (!Debug) { p.Hide(); } Uri archivedUrl = null; log.Info(p.Url.ToString()); Uri target = new Uri(p.Url.GetComponents(UriComponents.Host | UriComponents.Path | UriComponents.Scheme, UriFormat.SafeUnescaped)); using (Task <AvailableResponse> response = waybackClient.AvailableAsync(target)) using (Task <HttpResponseMessage> targetGetResponse = client.GetAsync(target)) { Comment commentResult = null; Task <Comment> commentTask = response.ContinueWith(async x => { AvailableResponse availableResponse = x.Result; int attempts = 2; bool success = false; do { attempts--; if (availableResponse?.archived_snapshots?.closest?.available == true) { archivedUrl = availableResponse.archived_snapshots.closest.url; log.Info("using available snapshot."); success = true; } else { log.Info("creating snapshot."); archivedUrl = await waybackClient.SaveAsync(target); using (HttpResponseMessage responseCheck = await client.GetAsync(archivedUrl)) { if (!responseCheck.IsSuccessStatusCode || responseCheck.StatusCode == HttpStatusCode.NotFound) { log.Warning($"404 returned from archive.org using provided response url. \nstatuscode:{responseCheck.StatusCode} \narchiveURL:{archivedUrl}"); } else { log.Info("check returned success."); success = true; } } } } while (attempts > 0 && !success); if (!success) { throw new ApplicationException("Wayback machine wouldn't cache content."); } string msg = $@"[Archive.org version.]({archivedUrl.ToString()}) :0: ---- ^^I'm ^^a ^^bot, ^^beep ^^boop"; log.Info(msg); Comment comment = null; if (!Debug) { comment = p.Comment(msg); } return(comment); }, TaskContinuationOptions.OnlyOnRanToCompletion).Unwrap(); await Task.WhenAll(targetGetResponse, commentTask).ContinueWith( x => { Comment comment = commentTask.Result; using (HttpResponseMessage articleResponse = targetGetResponse.Result) { SeattleTimesArticle seattleTimesArticle = new SeattleTimesArticle(articleResponse); if (seattleTimesArticle.PublishDate.Date < DateTime.Now.Date) { log.Info("article post is at least a day old, will make newsbank edit."); EditForNewsbank.GetCommentLine(new ArticlePost(seattleTimesArticle, comment), log, newsBankClient ).ContinueWith(y => { if (!String.IsNullOrEmpty(y.Result)) { EditForNewsbank.EditComment(y.Result, comment); log.Info("article post has been edited."); } else { log.Info("commentline null or empty will store article post"); articleTable.Execute(TableOperation.InsertOrReplace(new ArticlePost(seattleTimesArticle, comment))); } } ); } else { log.Info("will store article post"); articleTable.Execute(TableOperation.InsertOrReplace(new ArticlePost(seattleTimesArticle, comment))); } } }, TaskContinuationOptions.OnlyOnRanToCompletion); //TODO Dispose AvailableResponse; } } catch (Exception e) { log.Error("", e); throw; } }