static int GetMaxArticleIdInLocalStore() { using (var context = new SlackerNewsEntities()) { return(context.articles?.Max(t => t.hn_article_id) ?? 0); } }
static void UpdateStatsForArticle(int hnArticleId) { NLog.LogManager.GetCurrentClassLogger().Trace($"Updating stats for article {hnArticleId}"); var client = new RestClient(HackernewsApiUrl); var request = new RestRequest($"item/{hnArticleId}.json"); var response = client.Execute <Item>(request); if (response.ErrorException != null) { NLog.LogManager.GetCurrentClassLogger().Error(response.ErrorException); throw response.ErrorException; } else { if (response.Data.type == ItemType.story) { using (var context = new SlackerNewsEntities()) { var dbEntity = context.articles.Single(t => t.hn_article_id == response.Data.id); dbEntity.score = response.Data.score; context.SaveChanges(); // Update article with section UpdateSectionForArticle((int)dbEntity.hn_article_id); //SetSemanticAttributesForArticle((int)article.hn_article_id); } } else { throw new Exception($"ObjectId {hnArticleId} is not a story, cannot update stats"); } } }
static int GetMinArticleIdCreatedLastXHoursInLocalStore(int hoursSinceArticleCreated) { using (var context = new SlackerNewsEntities()) { DateTime createdSince = DateTime.Now.AddHours(-hoursSinceArticleCreated); return(context.articles.Where(t => t.create_datetime > createdSince)?.Min(t => t.hn_article_id) ?? 0); } }
static void UpdateSemanticAttributesForArticle(int hnArticleId) { NLog.LogManager.GetCurrentClassLogger().Info("Getting semantic attributes for article " + hnArticleId); using (var context = new SlackerNewsEntities()) { var article = context.articles.Single(t => t.hn_article_id == hnArticleId); if (article.score < Constants.ScoreThresholdForSemanticSummaryApi) { NLog.LogManager.GetCurrentClassLogger().Info($"Article score is {article.score}. We only run API calls to get semantic data when score exceeds {Constants.ScoreThresholdForSemanticSummaryApi}"); return; } if (!IsUrlValid(article.url)) { throw new Exception("Invalid url"); } if (string.IsNullOrWhiteSpace(article.tags)) { try { article.tags = GetAlchemyTagsForUrl(article.url); context.SaveChanges(); } catch (Exception ex) { // Keep going so we can try the semantic summary too NLog.LogManager.GetCurrentClassLogger().Error(ex); } } else { NLog.LogManager.GetCurrentClassLogger().Info("Article already has tags, continuing"); } if (string.IsNullOrWhiteSpace(article.semantic_summary)) { try { article.semantic_summary = GetSemantriaSummaryForUrl(article.url); context.SaveChanges(); } catch (Exception ex) { NLog.LogManager.GetCurrentClassLogger().Error(ex); } } else { NLog.LogManager.GetCurrentClassLogger().Info("Article already has summary, continuing"); } } }
static void UpdateSectionForArticle(int hnArticleId) { NLog.LogManager.GetCurrentClassLogger().Info("Updating section for article " + hnArticleId); using (var context = new SlackerNewsEntities()) { var article = context.articles.Single(t => t.hn_article_id == hnArticleId); if (article.score > Constants.ScoreThresholdForClassificationApi && article.api_fetch_date_classification == null) { var classifier = new SectionClassifier(); var section = classifier.GetSectionFromText(article.title); article.section_id = (int)section; article.api_fetch_date_classification = DateTime.UtcNow; context.SaveChanges(); } } }
static void GetAndStoreArticle(int hnArticleId) { NLog.LogManager.GetCurrentClassLogger().Trace($"Getting object id {hnArticleId} from {HackernewsApiUrl}"); var client = new RestClient(HackernewsApiUrl); var request = new RestRequest($"item/{hnArticleId}.json"); var response = client.Execute <Item>(request); if (response.ErrorException != null) { NLog.LogManager.GetCurrentClassLogger().Error(response.ErrorException); throw response.ErrorException; } else { if (response.Data.type == ItemType.story) { var dbEntity = response.Data.ToDbEntity(); using (var context = new SlackerNewsEntities()) { if (context.articles.Any(t => t.hn_article_id == dbEntity.hn_article_id)) { NLog.LogManager.GetCurrentClassLogger().Trace($"Object id {hnArticleId} already exists"); throw new Exception("Article already downloaded: " + dbEntity.hn_article_id); } NLog.LogManager.GetCurrentClassLogger().Trace($"Saving object id {hnArticleId} to database"); var article = response.Data.ToDbEntity(); article.section_id = (int)Constants.Section.General; context.articles.Add(article); context.SaveChanges(); } } else { NLog.LogManager.GetCurrentClassLogger().Trace($"Object id {hnArticleId} is not a story, type is {response.Data.type}"); } } }
static void UpdateStatsForRecentArticles(int hoursSinceArticleCreated = 240) { int maxLocalArticleId = GetMaxArticleIdInLocalStore(); int minLocalArticleIdInLastXHours = GetMinArticleIdCreatedLastXHoursInLocalStore(hoursSinceArticleCreated); // Only update recent articles if (minLocalArticleIdInLastXHours != 0) { List <int> articleIds; // Only run API queries for the subset of objects that are actually articles // Furthermore, only run update stats for articles we have successfully saved to the database using (var context = new SlackerNewsEntities()) { articleIds = context. articles. Where(t => t.hn_article_id >= minLocalArticleIdInLastXHours). Select(t => (int)t.hn_article_id). ToList(); } if (articleIds.Any()) { foreach (int i in articleIds) { try { UpdateStatsForArticle(i); } catch (Exception ex) { // Log and continue NLog.LogManager.GetCurrentClassLogger().Error(ex); } } } } }