private void WriteEntries(IEnumerable <Entry> entries, SqliteConnection connection, int maxId, ref DateTime min, ref DateTime max) { using (var transaction = connection.BeginTransaction()) { foreach (var e in entries) { StoryTable.Write(e, connection, transaction); if (e.Date > max) { max = e.Date; } if (e.Date < min) { min = e.Date; } } DateRangeTable.Write(min, max, connection, transaction); LastWriteTable.Write(maxId, connection, transaction); transaction.Commit(); } }
public PostCountsByDay Get() { lock (Lock) { if (memoryCache.TryGetValue(nameof(PostCountsByDay), out PostCountsByDay cachedResult)) { return(cachedResult); } if (!DateRangeTable.TryRead(connection, out var range)) { throw new InvalidOperationException("Empty date range table in SQLite database."); } var min = range.from; var max = range.to; var totalsByDay = new Dictionary <DateTime, ushort>(); foreach (var entry in StoryTable.GetEntries(connection)) { var day = entry.Date.Date; if (!totalsByDay.ContainsKey(day)) { totalsByDay[day] = 1; } else { totalsByDay[day]++; } } var days = new List <DateTime>(); var counts = new List <ushort>(); var current = min; while (current.Date < max.Date) { days.Add(current.Date); if (totalsByDay.TryGetValue(current.Date, out var count)) { counts.Add(count); } else { counts.Add(0); } current = current.AddDays(1); } cachedResult = new PostCountsByDay(min, max, counts, days); memoryCache.Set(nameof(PostCountsByDay), cachedResult); return(cachedResult); } }
/// <summary> /// Handles the one-time migration of data from the binary file format to SQLite. /// </summary> public static void Main(string[] args) { const string databaseLocation = @"C:\git\csharp\hn-reader\data"; const string dataLocation = @"C:\git\csharp\hn-reader"; var dbName = Path.Combine(databaseLocation, "hn-data.sqlite"); if (File.Exists(dbName)) { throw new InvalidOperationException("Database already exists! " + dbName); } using (var connection = Connector.ConnectToFile(dbName)) { var command = new SQLiteCommand(Schema.Create, connection); command.ExecuteNonQuery(); VersionTable.Write(1, connection); var backfilledFiles = Directory.GetFiles(dataLocation, "*-complete.bin"); var trackers = new DataTrackers(); using (var transaction = connection.BeginTransaction()) { foreach (var backfilledFile in backfilledFiles) { WriteFileIntoDatabase(backfilledFile, connection, trackers); Console.WriteLine("Completed file: " + Path.GetFileName(backfilledFile)); } transaction.Commit(); } using (var transaction = connection.BeginTransaction()) { WriteFileIntoDatabase(Path.Combine(dataLocation, "hn.bin"), connection, trackers); transaction.Commit(); } LastWriteTable.Write(trackers.MaxId, connection); DateRangeTable.Write(trackers.MinDate, trackers.MaxDate, connection); } }
public async Task Run(CancellationToken cancellationToken = default(CancellationToken)) { using var connection = connectionFactory.Open(); LastWriteTable.TryRead(connection, out var lastId); DateTime max; DateTime min; if (!DateRangeTable.TryRead(connection, out var range)) { max = DateTime.MinValue; min = DateTime.MaxValue; } else { max = range.to; min = range.from; } if (cancellationToken.IsCancellationRequested) { return; } var maxItemResponse = await httpClient .GetStringAsync("https://hacker-news.firebaseio.com/v0/maxitem.json") .ConfigureAwait(false); var maxItem = int.Parse(maxItemResponse); if (lastId >= maxItem) { return; } if (cancellationToken.IsCancellationRequested) { return; } var count = maxItem - Math.Max(0, lastId); Trace.WriteLine($"Running from {lastId} to {maxItem} ({count} items)."); var entries = new ConcurrentBag <Entry>(); var interval = (ThreadBucketSize * maxThreads); for (var i = lastId + 1; i <= maxItem; i += interval) { if (cancellationToken.IsCancellationRequested) { return; } var tasks = new Task[maxThreads]; for (var threadIndex = 0; threadIndex < maxThreads; threadIndex++) { var offset = threadIndex * ThreadBucketSize; var threadStartId = i + offset; var end = Math.Min(maxItem + 1, threadStartId + ThreadBucketSize); var task = RunBucket(threadStartId, end, entries, cancellationToken); tasks[threadIndex] = task; } await Task.WhenAll(tasks).ConfigureAwait(false); if (entries.Count > 10) { Trace.WriteLine("Flushing to database."); WriteEntries(entries.OrderBy(x => x.Id), connection, i + interval - 1, ref min, ref max); entries.Clear(); } } if (entries.Count > 0) { Trace.WriteLine("Flushing to database."); WriteEntries(entries.OrderBy(x => x.Id), connection, maxItem, ref min, ref max); } }