예제 #1
0
        public ContentResult GetArticle([FromQuery] string url, [FromQuery] string whitelist, [FromQuery] string blacklist)
        {
            var retval = new ContentResult {
                ContentType = "text/html"
            };
            var pr = PhantomReadability.Get(_env.ContentRootPath, url, whitelist, blacklist);

            if (pr.error == null)
            {
                retval.Content = pr.content;
            }
            else
            {
                retval.Content = pr.error.message;
            }
            return(retval);
        }
예제 #2
0
        // performs the translation of articles, either using cache or calling the
        // phantomJS helper to run readability on the article link url
        private static void TransformArticles(
            string feedUrl,
            string whitelist, string blacklist,
            IEnumerable <ArticleInfo> articles,
            int maxFeedEntriesProcessed)
        {
            // check cache first
            using (var conn = SqliteUtil.GetConn())
            {
                conn.Open();
                int articleCount = 0;

                // update the feed contents (only process the latest few items)
                foreach (var article in articles)
                {
                    string readableContent = "";

                    var readCache = new SqliteCommand("SELECT * FROM Articles WHERE FeedUrl = @feedurl AND ArticleUrl = @articleurl", conn);
                    readCache.Parameters.AddWithValue("@feedurl", feedUrl);
                    readCache.Parameters.AddWithValue("@articleurl", article.ArticleUrl);
                    using (var reader = readCache.ExecuteReader())
                    {
                        if (reader.Read())
                        {
                            readableContent  = reader["Content"].ToString();
                            readableContent += $"<!-- cached at {reader["LastFetchedUTC"]} -->";
                        }
                        else
                        {
                            // only process up to five items each time
                            if (articleCount++ >= maxFeedEntriesProcessed)
                            {
                                break;
                            }

                            // if not cached, make it readable!
                            var pr = PhantomReadability.Get(_env.ContentRootPath, article.ArticleUrl, whitelist, blacklist);
                            if (pr.error != null)
                            {
                                switch (pr.error.message)
                                {
                                case "Empty result from Readability.js.":
                                    // if readability can't handle it, cache the original html
                                    readableContent = article.ReplaceContents.Value;
                                    // reset the error
                                    pr.error = null;
                                    break;

                                default:
                                    // include the error message in CDATA comment
                                    article.ReplaceContents.Add(new XCData(pr.error.message));
                                    break;
                                }
                            }
                            else if (pr.isProbablyReaderable)
                            {
                                readableContent = pr.content;
                            }

                            // cache the result (even if not able to be made readable)
                            if (pr.error == null)
                            {
                                var writeCache = new SqliteCommand(@"
									INSERT INTO Articles (FeedUrl, ArticleUrl, LastFetchedUTC, Content, Readable)
									VALUES (@feedurl, @articleurl, @lastfetchedutc, @content, @readable)"                                    , conn);
                                writeCache.Parameters.AddWithValue("@feedurl", feedUrl);
                                writeCache.Parameters.AddWithValue("@articleurl", article.ArticleUrl);
                                writeCache.Parameters.AddWithValue("@lastfetchedutc", DateTime.UtcNow);
                                writeCache.Parameters.AddWithValue("@content", readableContent);
                                writeCache.Parameters.AddWithValue("@readable", readableContent != "");
                                writeCache.ExecuteNonQuery();
                            }
                        }
                    }

                    // if the article is readable, replace the contents
                    if (readableContent != "")
                    {
                        readableContent += $"<!-- processed at {DateTime.UtcNow} -->";
                        article.ReplaceContents.ReplaceNodes(new XCData(readableContent));
                    }
                }

                // clean up the cache
                var parameters   = string.Join(",", articles.Select((a, i) => "@a" + i));
                var cleanupCache = new SqliteCommand("", conn);
                cleanupCache.Parameters.AddRange(articles.Select((a, i) => new SqliteParameter("@a" + i, a.ArticleUrl)));
                cleanupCache.Parameters.AddWithValue("@feedurl", feedUrl);
                cleanupCache.CommandText = $"DELETE FROM Articles WHERE FeedUrl = @feedurl AND ArticleUrl NOT IN ({parameters})";
                cleanupCache.ExecuteNonQuery();
            }
        }