Esempio n. 1
0
        private void WriteEntries(IEnumerable <Entry> entries, SqliteConnection connection, int maxId, ref DateTime min, ref DateTime max)
        {
            using (var transaction = connection.BeginTransaction())
            {
                foreach (var e in entries)
                {
                    StoryTable.Write(e, connection, transaction);

                    if (e.Date > max)
                    {
                        max = e.Date;
                    }

                    if (e.Date < min)
                    {
                        min = e.Date;
                    }
                }

                DateRangeTable.Write(min, max, connection, transaction);
                LastWriteTable.Write(maxId, connection, transaction);

                transaction.Commit();
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Handles the one-time migration of data from the binary file format to SQLite.
        /// </summary>
        public static void Main(string[] args)
        {
            const string databaseLocation = @"C:\git\csharp\hn-reader\data";
            const string dataLocation     = @"C:\git\csharp\hn-reader";

            var dbName = Path.Combine(databaseLocation, "hn-data.sqlite");

            if (File.Exists(dbName))
            {
                throw new InvalidOperationException("Database already exists! " + dbName);
            }

            using (var connection = Connector.ConnectToFile(dbName))
            {
                var command = new SQLiteCommand(Schema.Create, connection);

                command.ExecuteNonQuery();

                VersionTable.Write(1, connection);

                var backfilledFiles = Directory.GetFiles(dataLocation, "*-complete.bin");

                var trackers = new DataTrackers();

                using (var transaction = connection.BeginTransaction())
                {
                    foreach (var backfilledFile in backfilledFiles)
                    {
                        WriteFileIntoDatabase(backfilledFile, connection, trackers);
                        Console.WriteLine("Completed file: " + Path.GetFileName(backfilledFile));
                    }

                    transaction.Commit();
                }

                using (var transaction = connection.BeginTransaction())
                {
                    WriteFileIntoDatabase(Path.Combine(dataLocation, "hn.bin"), connection, trackers);
                    transaction.Commit();
                }

                LastWriteTable.Write(trackers.MaxId, connection);
                DateRangeTable.Write(trackers.MinDate, trackers.MaxDate, connection);
            }
        }
Esempio n. 3
0
        public async Task Run(CancellationToken cancellationToken = default(CancellationToken))
        {
            using var connection = connectionFactory.Open();

            LastWriteTable.TryRead(connection, out var lastId);

            DateTime max;
            DateTime min;

            if (!DateRangeTable.TryRead(connection, out var range))
            {
                max = DateTime.MinValue;
                min = DateTime.MaxValue;
            }
            else
            {
                max = range.to;
                min = range.from;
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return;
            }

            var maxItemResponse = await httpClient
                                  .GetStringAsync("https://hacker-news.firebaseio.com/v0/maxitem.json")
                                  .ConfigureAwait(false);

            var maxItem = int.Parse(maxItemResponse);

            if (lastId >= maxItem)
            {
                return;
            }

            if (cancellationToken.IsCancellationRequested)
            {
                return;
            }

            var count = maxItem - Math.Max(0, lastId);

            Trace.WriteLine($"Running from {lastId} to {maxItem} ({count} items).");

            var entries = new ConcurrentBag <Entry>();

            var interval = (ThreadBucketSize * maxThreads);

            for (var i = lastId + 1; i <= maxItem; i += interval)
            {
                if (cancellationToken.IsCancellationRequested)
                {
                    return;
                }

                var tasks = new Task[maxThreads];
                for (var threadIndex = 0; threadIndex < maxThreads; threadIndex++)
                {
                    var offset = threadIndex * ThreadBucketSize;

                    var threadStartId = i + offset;

                    var end = Math.Min(maxItem + 1, threadStartId + ThreadBucketSize);

                    var task = RunBucket(threadStartId, end, entries, cancellationToken);

                    tasks[threadIndex] = task;
                }

                await Task.WhenAll(tasks).ConfigureAwait(false);

                if (entries.Count > 10)
                {
                    Trace.WriteLine("Flushing to database.");

                    WriteEntries(entries.OrderBy(x => x.Id), connection, i + interval - 1, ref min, ref max);

                    entries.Clear();
                }
            }

            if (entries.Count > 0)
            {
                Trace.WriteLine("Flushing to database.");

                WriteEntries(entries.OrderBy(x => x.Id), connection, maxItem, ref min, ref max);
            }
        }
Esempio n. 4
0
        public static void Index(string indexDirectory, SQLiteConnection connection, IndexWriter indexWriter)
        {
            Guard.CheckDirectoryValid(indexDirectory, nameof(indexDirectory), false);

            var lockFile = Path.Combine(indexDirectory, "index.lock");

            if (File.Exists(lockFile))
            {
                return;
            }

            lock (Lock)
            {
                var indexFile = Path.Combine(indexDirectory, "index.bin");

                var lastIndexedId = int.MinValue;

                var firstRun = !File.Exists(indexFile);

                if (!firstRun)
                {
                    if (RequiresUpdate(indexFile, out var actualLastIndexed))
                    {
                        lastIndexedId = actualLastIndexed;
                    }
                    else
                    {
                        return;
                    }
                }

                if (!LastWriteTable.TryRead(connection, out var lastWriteId) || lastIndexedId >= lastWriteId)
                {
                    Trace.WriteLine($"No indexing required, last indexed {lastIndexedId} which is equal to (or greater than) {lastWriteId}.");
                    return;
                }

                Trace.WriteLine($"Indexing from {lastWriteId} to {lastWriteId}.");

                try
                {
                    File.WriteAllBytes(lockFile, Array.Empty <byte>());

                    var count = 1;
                    foreach (var entry in StoryTable.GetEntries(connection, lastIndexedId))
                    {
                        var document = entry.ToDocument();

                        indexWriter.AddDocument(document);

                        if (count % 1000 == 0)
                        {
                            Trace.WriteLine($"Finished indexing #{count}.");
                        }

                        count++;
                    }

                    Trace.WriteLine($"Index complete, setting last indexed id to {lastWriteId}.");

                    File.WriteAllText(indexFile, lastWriteId.ToString(CultureInfo.InvariantCulture));
                }
                finally
                {
                    File.Delete(lockFile);
                }
            }
        }