コード例 #1
0
ファイル: Function1.cs プロジェクト: czf/ArchiveBot
        public static async Task Run([TimerTrigger("00 */10 * * * *")] TimerInfo myTimer, ILogger log)
        {
            if (!_hasRunInit)
            {
                Init();
            }


            CloudTable oauthTable = CloudStorageAccount
                                    .Parse(_storage)
                                    .CreateCloudTableClient()
                                    .GetTableReference("oauth");

            if (checkTableExists)
            {
                oauthTable.CreateIfNotExists();
            }

            CloudTable articleTable = CloudStorageAccount
                                      .Parse(_storage)
                                      .CreateCloudTableClient()
                                      .GetTableReference("article");

            if (checkTableExists)
            {
                articleTable.CreateIfNotExists();
                checkTableExists = true;
            }

            RedditOAuth result = (RedditOAuth)oauthTable
                                 .Execute(
                TableOperation.Retrieve <RedditOAuth>("reddit", _user)
                ).Result;

            //https://blog.maartenballiauw.be/post/2012/10/08/what-partitionkey-and-rowkey-are-for-in-windows-azure-table-storage.html
            //https://www.red-gate.com/simple-talk/cloud/cloud-data/an-introduction-to-windows-azure-table-storage/

            if (result?.GetNewToken < DateTimeOffset.Now)
            {
                result = null;
                log.LogInformation("need a new token");
            }

            Reddit      r                = null;
            BotWebAgent agent            = null;
            bool        saveToken        = false;
            bool        tryLogin         = false;
            int         tryLoginAttempts = 2;

            do
            {
                tryLoginAttempts--;
                tryLogin = false;
                if (result == null)
                {
                    agent  = new BotWebAgent(_user, _pass, _clientId, _secret, "https://www.reddit.com/user/somekindofbot0000/");
                    result = new RedditOAuth()
                    {
                        Token = agent.AccessToken, GetNewToken = DateTimeOffset.Now.AddMinutes(57), PartitionKey = "reddit", RowKey = _user
                    };
                    r         = new Reddit(agent, true);
                    saveToken = true;
                }
                else
                {
                    try
                    {
                        r = new Reddit(result.Token);
                    }
                    catch (AuthenticationException a)
                    {
                        result   = null;
                        tryLogin = true;
                    }
                    catch (WebException w)
                    {
                        if (w.Status == WebExceptionStatus.ProtocolError &&
                            (w.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.Unauthorized)
                        {
                            result   = null;
                            tryLogin = true;
                        }
                        else
                        {
                            throw;
                        }
                    }
                }
            } while (tryLogin && tryLoginAttempts > 0);

            if (r == null)
            {
                throw new Exception("couldn't get logged in");
            }

            if (saveToken)
            {
                oauthTable
                .Execute(
                    TableOperation.InsertOrReplace(result));
                log.LogInformation("saving token");
            }

            NewsBankClient newsBankClient = new NewsBankClient(
                new EnvironmentVariableEZProxySignInUriProvider(),
                EditForNewsbank._credProvider,
                new EnvironmentVariableProductBaseUriProvider(),
                new BasicCanLog(log));



            Task <HttpResponseMessage> checkMailTask = CheckMail(r, log, client);

            //https://www.reddit.com/r/SeattleWA/search?q=%28+site%3Aseattletimes.com+Subreddit%3ASeattleWA+%29&sort=new&t=day
            Listing <Post> posts =
                r.AdvancedSearch(x => x.Subreddit == "SeattleWA" &&
                                 x.Site == "seattletimes.com"
                                 , Sorting.New, TimeSorting.Day);

            bool allPostsSuccess = true;

            using (WaybackClient waybackMachine = new WaybackClient())
            {
                foreach (Post p in posts.TakeWhile(x => !x.IsHidden))
                {
                    allPostsSuccess &= await ProcessPost(p, waybackMachine, log, articleTable, newsBankClient);
                }
            }
            if (checkMailTask.Status < TaskStatus.RanToCompletion)
            {
                log.LogInformation("waiting for checkmail");
                await checkMailTask;
            }
            else
            {
                log.LogInformation(checkMailTask.Status.ToString());
            }


            log.LogInformation($"C# Timer trigger function executed at: {DateTime.Now}");

            if (!allPostsSuccess)
            {
                throw new ApplicationException("Not all Posts were processed successfully");
            }
        }
コード例 #2
0
ファイル: Function1.cs プロジェクト: czf/ArchiveBot
        private static async Task <bool> ProcessPost(Post p, WaybackClient waybackClient, ILogger log, CloudTable articleTable, NewsBankClient newsBankClient)
        {
            bool successProcessPost = true;

            try
            {
                if (!Debug)
                {
                    p.Hide();
                }
                Uri archivedUrl = null;
                log.LogInformation(p.Url.ToString());
                Uri target = new Uri(p.Url.GetComponents(UriComponents.Host | UriComponents.Path | UriComponents.Scheme, UriFormat.SafeUnescaped));
                using (Task <AvailableResponse> response = waybackClient.AvailableAsync(target))
                    using (Task <HttpResponseMessage> targetGetResponse = client.GetAsync(target))
                    {
                        Task <Comment> commentTask = response.ContinueWith(async x =>
                        {
                            AvailableResponse availableResponse = x.Result;

                            short attempts = 2;
                            bool success   = false;
                            do
                            {
                                attempts--;
                                if (availableResponse?.archived_snapshots?.closest?.available == true)
                                {
                                    archivedUrl = availableResponse.archived_snapshots.closest.url;
                                    log.LogInformation("using available snapshot.");
                                    success = true;
                                }
                                else
                                {
                                    log.LogInformation("creating snapshot.");
                                    archivedUrl = await waybackClient.SaveAsync(target);
                                    short validationAttempts = 2;
                                    do
                                    {
                                        validationAttempts--;
                                        using (HttpResponseMessage responseCheck = await client.GetAsync(archivedUrl))
                                        {
                                            if (!responseCheck.IsSuccessStatusCode || responseCheck.StatusCode == HttpStatusCode.NotFound)
                                            {
                                                log.LogWarning($"404 returned from archive.org using provided response url. \nstatuscode:{responseCheck.StatusCode}  \narchiveURL:{archivedUrl}");
                                                Thread.Sleep(100);
                                            }
                                            else
                                            {
                                                log.LogInformation("check returned success.");
                                                success = true;
                                            }
                                        }
                                    } while (validationAttempts > 0 && !success);
                                }
                            } while (attempts > 0 && !success);
                            if (!success)
                            {
                                successProcessPost = false;
                                throw new ApplicationException("Wayback machine wouldn't cache content.");
                            }

                            string msg =
                                $@"[Archive.org version.]({archivedUrl.ToString()})

:0:

----
^^You ^^can ^^support ^^Archive.org ^^via [^^(Amazon) ^^(Smile)](https://smile.amazon.com/ch/94-3242767)  
^^You ^^can ^^support ^^Seattle ^^Public ^^Library ^^via [^^(Amazon) ^^(Smile)](https://smile.amazon.com/ch/91-1140642)  
^^I'm ^^a ^^bot, ^^beep ^^boop [ ^^((fork) ^^(me) ^^(on) ^^(github))](https://github.com/czf/ArchiveBot)";

                            log.LogInformation(msg);


                            Comment comment = null;
                            if (!Debug)
                            {
                                comment = p.Comment(msg);
                            }
                            return(comment);
                        }, TaskContinuationOptions.OnlyOnRanToCompletion).Unwrap();
                        Comment c = await commentTask;
                        await Task.WhenAll(targetGetResponse, commentTask).ContinueWith(
                            x =>
                        {
                            log.LogInformation("start newsbank");
                            Comment comment = commentTask.Result;
                            using (HttpResponseMessage articleResponse = targetGetResponse.Result)
                            {
                                if (articleResponse == null)
                                {
                                    log.LogInformation("articleResponse is null");
                                }
                                SeattleTimesArticle seattleTimesArticle = new SeattleTimesArticle(articleResponse);
                                if (seattleTimesArticle.PublishDate.Date < DateTime.Now.Date)
                                {
                                    log.LogInformation("article post is at least a day old, will make newsbank edit.");
                                    EditForNewsbank.GetCommentLine(new ArticlePost(seattleTimesArticle, comment), log, newsBankClient
                                                                   ).ContinueWith(y =>
                                    {
                                        if (!String.IsNullOrEmpty(y.Result))
                                        {
                                            EditForNewsbank.EditComment(y.Result, comment);
                                            log.LogInformation("article post has been edited.");
                                        }
                                        else
                                        {
                                            log.LogInformation("commentline null or empty will store article post");
                                            articleTable.Execute(TableOperation.InsertOrReplace(new ArticlePost(seattleTimesArticle, comment)));
                                        }
                                    });
                                }
                                else
                                {
                                    log.LogInformation("will store article post");
                                    articleTable.Execute(TableOperation.InsertOrReplace(new ArticlePost(seattleTimesArticle, comment)));
                                }
                            }
                        }, TaskContinuationOptions.OnlyOnRanToCompletion);

                        //TODO Dispose AvailableResponse;
                    }
            }
            catch (Exception e)
            {
                log.LogError("", e);
                successProcessPost = false;
            }
            return(successProcessPost);
        }
コード例 #3
0
ファイル: EditForNewsbank.cs プロジェクト: czf/ArchiveBot
        internal static async Task <string> GetCommentLine(ArticlePost articlePost, ILogger log, NewsBankClient newsBankClient)
        {
            log.LogInformation("GetCommentLine");

            SearchResult searchResult = null;

            try
            {
                searchResult = await newsBankClient.Search(
                    new SearchRequest()
                {
                    Product      = Product.WorldNews,
                    Publications = new List <Publication>()
                    {
                        Publication.SeattleTimesWebEditionArticles
                    },
                    SearchParameter0 = new SearchParameter()
                    {
                        Field = SearchField.Author, Value = articlePost.ArticleAuthor.Replace("/", string.Empty)
                    },
                    SearchParameter1 = new SearchParameter()
                    {
                        Field = SearchField.Headline, Value = $"\"{articlePost.ArticleHeadline}\"", ParameterCompoundOperator = CompoundOperator.AND
                    },
                    SearchParameter2 = new SearchParameter()
                    {
                        Field = SearchField.Date, Value = articlePost.ArticleDate.ToShortDateString(), ParameterCompoundOperator = CompoundOperator.AND
                    }
                });
            }
            catch (NullReferenceException nullRefEx)  //not the best option.
            {
                log.LogError("possible no Web edition result, " + articlePost.CommentUri);
                log.LogError(nullRefEx.Message);
                log.LogError(nullRefEx.StackTrace);
                return(string.Empty);
            }
            catch (Exception e)
            {
                log.LogError(e.Message);
                throw;
            }
            return($"[NewsBank version]({searchResult.FirstSearchResultItem.ResultItemUri}) via SPL [^(SPL) ^(account) ^(required)](https://www.spl.org/using-the-library/get-started/get-started-with-a-library-card/library-card-application)");
        }
コード例 #4
0
ファイル: EditForNewsbank.cs プロジェクト: czf/ArchiveBot
        public static async Task Run([TimerTrigger("0 0 07 * * *")] TimerInfo myTimer, ILogger log)
        {
            NewsBankClient newsBankClient = new NewsBankClient(
                new EnvironmentVariableEZProxySignInUriProvider(),
                _credProvider,
                new EnvironmentVariableProductBaseUriProvider(),
                new BasicCanLog(log)
                );


            CloudTable articleTable = CloudStorageAccount
                                      .Parse(_storage)
                                      .CreateCloudTableClient()
                                      .GetTableReference("article");

            if (checkTableExists)
            {
                articleTable.CreateIfNotExists();
            }

            DateTime today = new DateTime(DateTime.Today.Ticks, DateTimeKind.Utc);
            TableQuery <ArticlePost> articlesPublishedBeforeToday = articleTable.CreateQuery <ArticlePost>().Where(x => x.ArticleDate <today && x.ArticleDate> today.AddDays(-27)).AsTableQuery();



            CloudTable oauthTable = CloudStorageAccount
                                    .Parse(_storage)
                                    .CreateCloudTableClient()
                                    .GetTableReference("oauth");

            if (checkTableExists)
            {
                checkTableExists = false;
                oauthTable.CreateIfNotExists();
            }



            RedditOAuth result = (RedditOAuth)oauthTable
                                 .Execute(
                TableOperation.Retrieve <RedditOAuth>("reddit", _user)
                ).Result;

            //https://blog.maartenballiauw.be/post/2012/10/08/what-partitionkey-and-rowkey-are-for-in-windows-azure-table-storage.html
            //https://www.red-gate.com/simple-talk/cloud/cloud-data/an-introduction-to-windows-azure-table-storage/

            if (result?.GetNewToken < DateTimeOffset.Now)
            {
                result = null;
                log.LogInformation("need a new token");
            }

            Reddit      r                = null;
            BotWebAgent agent            = null;
            bool        saveToken        = false;
            bool        tryLogin         = false;
            int         tryLoginAttempts = 2;

            do
            {
                tryLoginAttempts--;
                tryLogin = false;
                if (result == null)
                {
                    agent  = new BotWebAgent(_user, _pass, _clientId, _secret, "https://www.reddit.com/user/somekindofbot0000/");
                    result = new RedditOAuth()
                    {
                        Token = agent.AccessToken, GetNewToken = DateTimeOffset.Now.AddMinutes(57), PartitionKey = "reddit", RowKey = _user
                    };
                    r         = new Reddit(agent, true);
                    saveToken = true;
                }
                else
                {
                    try
                    {
                        r = new Reddit(result.Token);
                    }
                    catch (AuthenticationException a)
                    {
                        result   = null;
                        tryLogin = true;
                    }
                    catch (WebException w)
                    {
                        if (w.Status == WebExceptionStatus.ProtocolError &&
                            (w.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.Unauthorized)
                        {
                            result   = null;
                            tryLogin = true;
                        }
                        else
                        {
                            throw;
                        }
                    }
                }
            } while (tryLogin && tryLoginAttempts > 0);

            if (r == null)
            {
                throw new Exception("couldn't get logged in");
            }

            if (saveToken)
            {
                oauthTable
                .Execute(
                    TableOperation.InsertOrReplace(result));
                log.LogInformation("saving token");
            }

            List <Task> updateCommentTasks = new List <Task>();

            foreach (ArticlePost ap in articlesPublishedBeforeToday)
            {
                Task updateComment = GetCommentLine(ap, log, newsBankClient)
                                     .ContinueWith(async(commentLine) =>
                {
                    bool retry = false;
                    do
                    {
                        Comment c = r.GetComment(new Uri("https://www.reddit.com" + ap.CommentUri));
                        if (!String.IsNullOrEmpty(commentLine.Result))
                        {
                            EditComment(commentLine.Result, c);
                            articleTable.Execute(TableOperation.Delete(ap));
                            retry = false;
                        }
                        else
                        {
                            log.LogInformation("Empty CommentLine, will check headline.");
                            retry = await TryUpdateArticleData(ap, r, articleTable, log);
                            if (retry)
                            {
                                log.LogInformation($"author: {ap.ArticleAuthor} ---- headline: {ap.ArticleHeadline}");
                            }
                        }
                    } while (retry);
                }
                                                   , TaskContinuationOptions.OnlyOnRanToCompletion).Unwrap();

                updateCommentTasks.Add(updateComment);
            }

            await Task.WhenAll(updateCommentTasks.ToArray());

            log.LogInformation("AwaitWhenALL " + updateCommentTasks.Count.ToString());
        }
コード例 #5
0
        private static async Task ProcessPost(Post p, WaybackClient waybackClient, TraceWriter log, CloudTable articleTable, NewsBankClient newsBankClient)
        {
            try
            {
                if (!Debug)
                {
                    p.Hide();
                }
                Uri archivedUrl = null;
                log.Info(p.Url.ToString());
                Uri target = new Uri(p.Url.GetComponents(UriComponents.Host | UriComponents.Path | UriComponents.Scheme, UriFormat.SafeUnescaped));
                using (Task <AvailableResponse> response = waybackClient.AvailableAsync(target))
                    using (Task <HttpResponseMessage> targetGetResponse = client.GetAsync(target))
                    {
                        Comment        commentResult = null;
                        Task <Comment> commentTask   = response.ContinueWith(async x =>
                        {
                            AvailableResponse availableResponse = x.Result;

                            int attempts = 2;
                            bool success = false;
                            do
                            {
                                attempts--;
                                if (availableResponse?.archived_snapshots?.closest?.available == true)
                                {
                                    archivedUrl = availableResponse.archived_snapshots.closest.url;
                                    log.Info("using available snapshot.");
                                    success = true;
                                }
                                else
                                {
                                    log.Info("creating snapshot.");
                                    archivedUrl = await waybackClient.SaveAsync(target);

                                    using (HttpResponseMessage responseCheck = await client.GetAsync(archivedUrl))
                                    {
                                        if (!responseCheck.IsSuccessStatusCode || responseCheck.StatusCode == HttpStatusCode.NotFound)
                                        {
                                            log.Warning($"404 returned from archive.org using provided response url. \nstatuscode:{responseCheck.StatusCode}  \narchiveURL:{archivedUrl}");
                                        }
                                        else
                                        {
                                            log.Info("check returned success.");
                                            success = true;
                                        }
                                    }
                                }
                            } while (attempts > 0 && !success);
                            if (!success)
                            {
                                throw new ApplicationException("Wayback machine wouldn't cache content.");
                            }

                            string msg =
                                $@"[Archive.org version.]({archivedUrl.ToString()})

:0:

----
^^I'm ^^a ^^bot, ^^beep ^^boop";

                            log.Info(msg);


                            Comment comment = null;
                            if (!Debug)
                            {
                                comment = p.Comment(msg);
                            }
                            return(comment);
                        }, TaskContinuationOptions.OnlyOnRanToCompletion).Unwrap();

                        await Task.WhenAll(targetGetResponse, commentTask).ContinueWith(
                            x =>

                        {
                            Comment comment = commentTask.Result;
                            using (HttpResponseMessage articleResponse = targetGetResponse.Result)
                            {
                                SeattleTimesArticle seattleTimesArticle = new SeattleTimesArticle(articleResponse);
                                if (seattleTimesArticle.PublishDate.Date < DateTime.Now.Date)
                                {
                                    log.Info("article post is at least a day old, will make newsbank edit.");
                                    EditForNewsbank.GetCommentLine(new ArticlePost(seattleTimesArticle, comment), log, newsBankClient
                                                                   ).ContinueWith(y => {
                                        if (!String.IsNullOrEmpty(y.Result))
                                        {
                                            EditForNewsbank.EditComment(y.Result, comment);
                                            log.Info("article post has been edited.");
                                        }
                                        else
                                        {
                                            log.Info("commentline null or empty will store article post");
                                            articleTable.Execute(TableOperation.InsertOrReplace(new ArticlePost(seattleTimesArticle, comment)));
                                        }
                                    }
                                                                                  );
                                }
                                else
                                {
                                    log.Info("will store article post");
                                    articleTable.Execute(TableOperation.InsertOrReplace(new ArticlePost(seattleTimesArticle, comment)));
                                }
                            }
                        }, TaskContinuationOptions.OnlyOnRanToCompletion);

                        //TODO Dispose AvailableResponse;
                    }
            }
            catch (Exception e)
            {
                log.Error("", e);
                throw;
            }
        }