private void WriteEntries(IEnumerable <Entry> entries, SqliteConnection connection, int maxId, ref DateTime min, ref DateTime max) { using (var transaction = connection.BeginTransaction()) { foreach (var e in entries) { StoryTable.Write(e, connection, transaction); if (e.Date > max) { max = e.Date; } if (e.Date < min) { min = e.Date; } } DateRangeTable.Write(min, max, connection, transaction); LastWriteTable.Write(maxId, connection, transaction); transaction.Commit(); } }
/// <summary> /// Handles the one-time migration of data from the binary file format to SQLite. /// </summary> public static void Main(string[] args) { const string databaseLocation = @"C:\git\csharp\hn-reader\data"; const string dataLocation = @"C:\git\csharp\hn-reader"; var dbName = Path.Combine(databaseLocation, "hn-data.sqlite"); if (File.Exists(dbName)) { throw new InvalidOperationException("Database already exists! " + dbName); } using (var connection = Connector.ConnectToFile(dbName)) { var command = new SQLiteCommand(Schema.Create, connection); command.ExecuteNonQuery(); VersionTable.Write(1, connection); var backfilledFiles = Directory.GetFiles(dataLocation, "*-complete.bin"); var trackers = new DataTrackers(); using (var transaction = connection.BeginTransaction()) { foreach (var backfilledFile in backfilledFiles) { WriteFileIntoDatabase(backfilledFile, connection, trackers); Console.WriteLine("Completed file: " + Path.GetFileName(backfilledFile)); } transaction.Commit(); } using (var transaction = connection.BeginTransaction()) { WriteFileIntoDatabase(Path.Combine(dataLocation, "hn.bin"), connection, trackers); transaction.Commit(); } LastWriteTable.Write(trackers.MaxId, connection); DateRangeTable.Write(trackers.MinDate, trackers.MaxDate, connection); } }
public async Task Run(CancellationToken cancellationToken = default(CancellationToken)) { using var connection = connectionFactory.Open(); LastWriteTable.TryRead(connection, out var lastId); DateTime max; DateTime min; if (!DateRangeTable.TryRead(connection, out var range)) { max = DateTime.MinValue; min = DateTime.MaxValue; } else { max = range.to; min = range.from; } if (cancellationToken.IsCancellationRequested) { return; } var maxItemResponse = await httpClient .GetStringAsync("https://hacker-news.firebaseio.com/v0/maxitem.json") .ConfigureAwait(false); var maxItem = int.Parse(maxItemResponse); if (lastId >= maxItem) { return; } if (cancellationToken.IsCancellationRequested) { return; } var count = maxItem - Math.Max(0, lastId); Trace.WriteLine($"Running from {lastId} to {maxItem} ({count} items)."); var entries = new ConcurrentBag <Entry>(); var interval = (ThreadBucketSize * maxThreads); for (var i = lastId + 1; i <= maxItem; i += interval) { if (cancellationToken.IsCancellationRequested) { return; } var tasks = new Task[maxThreads]; for (var threadIndex = 0; threadIndex < maxThreads; threadIndex++) { var offset = threadIndex * ThreadBucketSize; var threadStartId = i + offset; var end = Math.Min(maxItem + 1, threadStartId + ThreadBucketSize); var task = RunBucket(threadStartId, end, entries, cancellationToken); tasks[threadIndex] = task; } await Task.WhenAll(tasks).ConfigureAwait(false); if (entries.Count > 10) { Trace.WriteLine("Flushing to database."); WriteEntries(entries.OrderBy(x => x.Id), connection, i + interval - 1, ref min, ref max); entries.Clear(); } } if (entries.Count > 0) { Trace.WriteLine("Flushing to database."); WriteEntries(entries.OrderBy(x => x.Id), connection, maxItem, ref min, ref max); } }
public static void Index(string indexDirectory, SQLiteConnection connection, IndexWriter indexWriter) { Guard.CheckDirectoryValid(indexDirectory, nameof(indexDirectory), false); var lockFile = Path.Combine(indexDirectory, "index.lock"); if (File.Exists(lockFile)) { return; } lock (Lock) { var indexFile = Path.Combine(indexDirectory, "index.bin"); var lastIndexedId = int.MinValue; var firstRun = !File.Exists(indexFile); if (!firstRun) { if (RequiresUpdate(indexFile, out var actualLastIndexed)) { lastIndexedId = actualLastIndexed; } else { return; } } if (!LastWriteTable.TryRead(connection, out var lastWriteId) || lastIndexedId >= lastWriteId) { Trace.WriteLine($"No indexing required, last indexed {lastIndexedId} which is equal to (or greater than) {lastWriteId}."); return; } Trace.WriteLine($"Indexing from {lastWriteId} to {lastWriteId}."); try { File.WriteAllBytes(lockFile, Array.Empty <byte>()); var count = 1; foreach (var entry in StoryTable.GetEntries(connection, lastIndexedId)) { var document = entry.ToDocument(); indexWriter.AddDocument(document); if (count % 1000 == 0) { Trace.WriteLine($"Finished indexing #{count}."); } count++; } Trace.WriteLine($"Index complete, setting last indexed id to {lastWriteId}."); File.WriteAllText(indexFile, lastWriteId.ToString(CultureInfo.InvariantCulture)); } finally { File.Delete(lockFile); } } }