Exemple #1
0
        private void WriteEntries(IEnumerable <Entry> entries, SqliteConnection connection, int maxId, ref DateTime min, ref DateTime max)
        {
            using (var transaction = connection.BeginTransaction())
            {
                foreach (var e in entries)
                {
                    StoryTable.Write(e, connection, transaction);

                    if (e.Date > max)
                    {
                        max = e.Date;
                    }

                    if (e.Date < min)
                    {
                        min = e.Date;
                    }
                }

                DateRangeTable.Write(min, max, connection, transaction);
                LastWriteTable.Write(maxId, connection, transaction);

                transaction.Commit();
            }
        }
Exemple #2
0
        public PostCountsByDay Get()
        {
            lock (Lock)
            {
                if (memoryCache.TryGetValue(nameof(PostCountsByDay), out PostCountsByDay cachedResult))
                {
                    return(cachedResult);
                }

                if (!DateRangeTable.TryRead(connection, out var range))
                {
                    throw new InvalidOperationException("Empty date range table in SQLite database.");
                }

                var min = range.from;
                var max = range.to;

                var totalsByDay = new Dictionary <DateTime, ushort>();

                foreach (var entry in StoryTable.GetEntries(connection))
                {
                    var day = entry.Date.Date;

                    if (!totalsByDay.ContainsKey(day))
                    {
                        totalsByDay[day] = 1;
                    }
                    else
                    {
                        totalsByDay[day]++;
                    }
                }

                var days   = new List <DateTime>();
                var counts = new List <ushort>();

                var current = min;
                while (current.Date < max.Date)
                {
                    days.Add(current.Date);

                    if (totalsByDay.TryGetValue(current.Date, out var count))
                    {
                        counts.Add(count);
                    }
                    else
                    {
                        counts.Add(0);
                    }

                    current = current.AddDays(1);
                }

                cachedResult = new PostCountsByDay(min, max, counts, days);

                memoryCache.Set(nameof(PostCountsByDay), cachedResult);

                return(cachedResult);
            }
        }
Exemple #3
0
        /// <summary>
        /// Handles the one-time migration of data from the binary file format to SQLite.
        /// </summary>
        public static void Main(string[] args)
        {
            const string databaseLocation = @"C:\git\csharp\hn-reader\data";
            const string dataLocation     = @"C:\git\csharp\hn-reader";

            var dbName = Path.Combine(databaseLocation, "hn-data.sqlite");

            if (File.Exists(dbName))
            {
                throw new InvalidOperationException("Database already exists! " + dbName);
            }

            using (var connection = Connector.ConnectToFile(dbName))
            {
                var command = new SQLiteCommand(Schema.Create, connection);

                command.ExecuteNonQuery();

                VersionTable.Write(1, connection);

                var backfilledFiles = Directory.GetFiles(dataLocation, "*-complete.bin");

                var trackers = new DataTrackers();

                using (var transaction = connection.BeginTransaction())
                {
                    foreach (var backfilledFile in backfilledFiles)
                    {
                        WriteFileIntoDatabase(backfilledFile, connection, trackers);
                        Console.WriteLine("Completed file: " + Path.GetFileName(backfilledFile));
                    }

                    transaction.Commit();
                }

                using (var transaction = connection.BeginTransaction())
                {
                    WriteFileIntoDatabase(Path.Combine(dataLocation, "hn.bin"), connection, trackers);
                    transaction.Commit();
                }

                LastWriteTable.Write(trackers.MaxId, connection);
                DateRangeTable.Write(trackers.MinDate, trackers.MaxDate, connection);
            }
        }
Exemple #4
0
        public async Task Run(CancellationToken cancellationToken = default(CancellationToken))
        {
            using var connection = connectionFactory.Open();

            LastWriteTable.TryRead(connection, out var lastId);

            DateTime max;
            DateTime min;

            if (!DateRangeTable.TryRead(connection, out var range))
            {
                max = DateTime.MinValue;
                min = DateTime.MaxValue;
            }
            else
            {
                max = range.to;
                min = range.from;
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return;
            }

            var maxItemResponse = await httpClient
                                  .GetStringAsync("https://hacker-news.firebaseio.com/v0/maxitem.json")
                                  .ConfigureAwait(false);

            var maxItem = int.Parse(maxItemResponse);

            if (lastId >= maxItem)
            {
                return;
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return;
            }

            var count = maxItem - Math.Max(0, lastId);

            Trace.WriteLine($"Running from {lastId} to {maxItem} ({count} items).");

            var entries = new ConcurrentBag <Entry>();

            var interval = (ThreadBucketSize * maxThreads);

            for (var i = lastId + 1; i <= maxItem; i += interval)
            {
                if (cancellationToken.IsCancellationRequested)
                {
                    return;
                }

                var tasks = new Task[maxThreads];
                for (var threadIndex = 0; threadIndex < maxThreads; threadIndex++)
                {
                    var offset = threadIndex * ThreadBucketSize;

                    var threadStartId = i + offset;

                    var end = Math.Min(maxItem + 1, threadStartId + ThreadBucketSize);

                    var task = RunBucket(threadStartId, end, entries, cancellationToken);

                    tasks[threadIndex] = task;
                }

                await Task.WhenAll(tasks).ConfigureAwait(false);

                if (entries.Count > 10)
                {
                    Trace.WriteLine("Flushing to database.");

                    WriteEntries(entries.OrderBy(x => x.Id), connection, i + interval - 1, ref min, ref max);

                    entries.Clear();
                }
            }

            if (entries.Count > 0)
            {
                Trace.WriteLine("Flushing to database.");

                WriteEntries(entries.OrderBy(x => x.Id), connection, maxItem, ref min, ref max);
            }
        }