Exemple #1
0
 static int GetMaxArticleIdInLocalStore()
 {
     using (var context = new SlackerNewsEntities())
     {
         return(context.articles?.Max(t => t.hn_article_id) ?? 0);
     }
 }
Exemple #2
0
        static void UpdateStatsForArticle(int hnArticleId)
        {
            NLog.LogManager.GetCurrentClassLogger().Trace($"Updating stats for article {hnArticleId}");

            var client   = new RestClient(HackernewsApiUrl);
            var request  = new RestRequest($"item/{hnArticleId}.json");
            var response = client.Execute <Item>(request);

            if (response.ErrorException != null)
            {
                NLog.LogManager.GetCurrentClassLogger().Error(response.ErrorException);
                throw response.ErrorException;
            }
            else
            {
                if (response.Data.type == ItemType.story)
                {
                    using (var context = new SlackerNewsEntities())
                    {
                        var dbEntity = context.articles.Single(t => t.hn_article_id == response.Data.id);
                        dbEntity.score = response.Data.score;
                        context.SaveChanges();

                        // Update article with section
                        UpdateSectionForArticle((int)dbEntity.hn_article_id);

                        //SetSemanticAttributesForArticle((int)article.hn_article_id);
                    }
                }
                else
                {
                    throw new Exception($"ObjectId {hnArticleId} is not a story, cannot update stats");
                }
            }
        }
Exemple #3
0
 static int GetMinArticleIdCreatedLastXHoursInLocalStore(int hoursSinceArticleCreated)
 {
     using (var context = new SlackerNewsEntities())
     {
         DateTime createdSince = DateTime.Now.AddHours(-hoursSinceArticleCreated);
         return(context.articles.Where(t => t.create_datetime > createdSince)?.Min(t => t.hn_article_id) ?? 0);
     }
 }
Exemple #4
0
        static void UpdateSemanticAttributesForArticle(int hnArticleId)
        {
            NLog.LogManager.GetCurrentClassLogger().Info("Getting semantic attributes for article " + hnArticleId);

            using (var context = new SlackerNewsEntities())
            {
                var article = context.articles.Single(t => t.hn_article_id == hnArticleId);

                if (article.score < Constants.ScoreThresholdForSemanticSummaryApi)
                {
                    NLog.LogManager.GetCurrentClassLogger().Info($"Article score is {article.score}. We only run API calls to get semantic data when score exceeds {Constants.ScoreThresholdForSemanticSummaryApi}");
                    return;
                }

                if (!IsUrlValid(article.url))
                {
                    throw new Exception("Invalid url");
                }

                if (string.IsNullOrWhiteSpace(article.tags))
                {
                    try
                    {
                        article.tags = GetAlchemyTagsForUrl(article.url);
                        context.SaveChanges();
                    }
                    catch (Exception ex)
                    {
                        // Keep going so we can try the semantic summary too
                        NLog.LogManager.GetCurrentClassLogger().Error(ex);
                    }
                }
                else
                {
                    NLog.LogManager.GetCurrentClassLogger().Info("Article already has tags, continuing");
                }

                if (string.IsNullOrWhiteSpace(article.semantic_summary))
                {
                    try
                    {
                        article.semantic_summary = GetSemantriaSummaryForUrl(article.url);
                        context.SaveChanges();
                    }
                    catch (Exception ex)
                    {
                        NLog.LogManager.GetCurrentClassLogger().Error(ex);
                    }
                }
                else
                {
                    NLog.LogManager.GetCurrentClassLogger().Info("Article already has summary, continuing");
                }
            }
        }
Exemple #5
0
        static void UpdateSectionForArticle(int hnArticleId)
        {
            NLog.LogManager.GetCurrentClassLogger().Info("Updating section for article " + hnArticleId);

            using (var context = new SlackerNewsEntities())
            {
                var article = context.articles.Single(t => t.hn_article_id == hnArticleId);

                if (article.score > Constants.ScoreThresholdForClassificationApi &&
                    article.api_fetch_date_classification == null)
                {
                    var classifier = new SectionClassifier();
                    var section    = classifier.GetSectionFromText(article.title);
                    article.section_id = (int)section;
                    article.api_fetch_date_classification = DateTime.UtcNow;
                    context.SaveChanges();
                }
            }
        }
Exemple #6
0
        static void GetAndStoreArticle(int hnArticleId)
        {
            NLog.LogManager.GetCurrentClassLogger().Trace($"Getting object id {hnArticleId} from {HackernewsApiUrl}");

            var client   = new RestClient(HackernewsApiUrl);
            var request  = new RestRequest($"item/{hnArticleId}.json");
            var response = client.Execute <Item>(request);

            if (response.ErrorException != null)
            {
                NLog.LogManager.GetCurrentClassLogger().Error(response.ErrorException);
                throw response.ErrorException;
            }
            else
            {
                if (response.Data.type == ItemType.story)
                {
                    var dbEntity = response.Data.ToDbEntity();

                    using (var context = new SlackerNewsEntities())
                    {
                        if (context.articles.Any(t => t.hn_article_id == dbEntity.hn_article_id))
                        {
                            NLog.LogManager.GetCurrentClassLogger().Trace($"Object id {hnArticleId} already exists");
                            throw new Exception("Article already downloaded: " + dbEntity.hn_article_id);
                        }

                        NLog.LogManager.GetCurrentClassLogger().Trace($"Saving object id {hnArticleId} to database");
                        var article = response.Data.ToDbEntity();
                        article.section_id = (int)Constants.Section.General;
                        context.articles.Add(article);
                        context.SaveChanges();
                    }
                }
                else
                {
                    NLog.LogManager.GetCurrentClassLogger().Trace($"Object id {hnArticleId} is not a story, type is {response.Data.type}");
                }
            }
        }
Exemple #7
0
        static void UpdateStatsForRecentArticles(int hoursSinceArticleCreated = 240)
        {
            int maxLocalArticleId             = GetMaxArticleIdInLocalStore();
            int minLocalArticleIdInLastXHours = GetMinArticleIdCreatedLastXHoursInLocalStore(hoursSinceArticleCreated);

            // Only update recent articles
            if (minLocalArticleIdInLastXHours != 0)
            {
                List <int> articleIds;

                // Only run API queries for the subset of objects that are actually articles
                // Furthermore, only run update stats for articles we have successfully saved to the database
                using (var context = new SlackerNewsEntities())
                {
                    articleIds = context.
                                 articles.
                                 Where(t => t.hn_article_id >= minLocalArticleIdInLastXHours).
                                 Select(t => (int)t.hn_article_id).
                                 ToList();
                }

                if (articleIds.Any())
                {
                    foreach (int i in articleIds)
                    {
                        try
                        {
                            UpdateStatsForArticle(i);
                        }
                        catch (Exception ex)
                        {
                            // Log and continue
                            NLog.LogManager.GetCurrentClassLogger().Error(ex);
                        }
                    }
                }
            }
        }