public BionSearcher(string bionFilePath, int runDepth) { _compressor = Memory.Log("Dictionary", () => WordCompressor.OpenRead(Path.ChangeExtension(bionFilePath, ".wdx"))); _containerIndex = Memory.Log("ContainerIndex", () => ContainerIndex.OpenRead(Path.ChangeExtension(bionFilePath, ".cdx"))); _searchIndexReader = Memory.Log("SearchIndex", () => new SearchIndexReader(Path.ChangeExtension(bionFilePath, ".idx"))); _bionReader = Memory.Log("BionReader", () => new BionReader(File.OpenRead(bionFilePath), containerIndex: _containerIndex, compressor: _compressor)); _runDepth = runDepth; _termPositions = new long[256]; }
public void Dispose() { _compressor?.Dispose(); _compressor = null; _searchIndexReader?.Dispose(); _searchIndexReader = null; _containerIndex?.Dispose(); _containerIndex = null; _bionReader?.Dispose(); _bionReader = null; }
public void SearchIndex_RoundTrip(bool requireMerge) { int wordCount = 100; int occurrences = 10; int indexBufferSize = (requireMerge ? 500 : 1000); int increment = 16; string path = "SearchIndex.idx"; using (SearchIndexWriter writer = new SearchIndexWriter(path, 100, indexBufferSize)) { int position = 0; for (int occurrence = 0; occurrence < occurrences; ++occurrence) { for (int wordIndex = 0; wordIndex < wordCount; ++wordIndex) { writer.Add(wordIndex, position); position += increment; } } } using (SearchIndexReader reader = new SearchIndexReader(path)) { long[] positions = new long[occurrences]; for (int wordIndex = 0; wordIndex < wordCount; ++wordIndex) { int matchCount = 0; // Read matches for word SearchResult result = reader.Find(wordIndex); while (!result.Done) { matchCount += result.Page(ref positions); } // There should be 10 for every word Assert.AreEqual(occurrences, matchCount); // Verify matches are where expected for (int occurrence = 0; occurrence < occurrences; ++occurrence) { long expected = increment * (wordIndex + (occurrence * wordCount)); Assert.AreEqual(expected, positions[occurrence]); } } } }
public static void TranslateSearchIndex(string filePath, string outPath, bool absolute) { long[] decoded = new long[BlockSize]; long intCount = 0; long bytesWritten = 0; using (new ConsoleWatch($"Translating \"{filePath}\" to block \"{outPath}\"...", () => $"{intCount:n0} entries, written to {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(intCount)):n2} bits per position)")) { using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(outPath))) using (SearchIndexReader reader = new SearchIndexReader(filePath)) { for (int i = 0; i < reader.WordCount; ++i) { int last = 0; SearchResult result = reader.Find(i); while (!result.Done) { int count = result.Page(ref decoded); for (int j = 0; j < count; ++j) { int current = (int)decoded[j]; writer.Write((absolute ? current : current - last)); last = current; } intCount += count; } } System.Console.WriteLine(writer.Stats); } bytesWritten = new FileInfo(outPath).Length; } }