Ejemplo n.º 1
0
        public BionSearcher(string bionFilePath, int runDepth)
        {
            _compressor        = Memory.Log("Dictionary", () => WordCompressor.OpenRead(Path.ChangeExtension(bionFilePath, ".wdx")));
            _containerIndex    = Memory.Log("ContainerIndex", () => ContainerIndex.OpenRead(Path.ChangeExtension(bionFilePath, ".cdx")));
            _searchIndexReader = Memory.Log("SearchIndex", () => new SearchIndexReader(Path.ChangeExtension(bionFilePath, ".idx")));
            _bionReader        = Memory.Log("BionReader", () => new BionReader(File.OpenRead(bionFilePath), containerIndex: _containerIndex, compressor: _compressor));

            _runDepth      = runDepth;
            _termPositions = new long[256];
        }
Ejemplo n.º 2
0
        public void Dispose()
        {
            _compressor?.Dispose();
            _compressor = null;

            _searchIndexReader?.Dispose();
            _searchIndexReader = null;

            _containerIndex?.Dispose();
            _containerIndex = null;

            _bionReader?.Dispose();
            _bionReader = null;
        }
Ejemplo n.º 3
0
        public void SearchIndex_RoundTrip(bool requireMerge)
        {
            int wordCount       = 100;
            int occurrences     = 10;
            int indexBufferSize = (requireMerge ? 500 : 1000);
            int increment       = 16;

            string path = "SearchIndex.idx";

            using (SearchIndexWriter writer = new SearchIndexWriter(path, 100, indexBufferSize))
            {
                int position = 0;
                for (int occurrence = 0; occurrence < occurrences; ++occurrence)
                {
                    for (int wordIndex = 0; wordIndex < wordCount; ++wordIndex)
                    {
                        writer.Add(wordIndex, position);
                        position += increment;
                    }
                }
            }

            using (SearchIndexReader reader = new SearchIndexReader(path))
            {
                long[] positions = new long[occurrences];

                for (int wordIndex = 0; wordIndex < wordCount; ++wordIndex)
                {
                    int matchCount = 0;

                    // Read matches for word
                    SearchResult result = reader.Find(wordIndex);
                    while (!result.Done)
                    {
                        matchCount += result.Page(ref positions);
                    }

                    // There should be 10 for every word
                    Assert.AreEqual(occurrences, matchCount);

                    // Verify matches are where expected
                    for (int occurrence = 0; occurrence < occurrences; ++occurrence)
                    {
                        long expected = increment * (wordIndex + (occurrence * wordCount));
                        Assert.AreEqual(expected, positions[occurrence]);
                    }
                }
            }
        }
Ejemplo n.º 4
0
        public static void TranslateSearchIndex(string filePath, string outPath, bool absolute)
        {
            long[] decoded      = new long[BlockSize];
            long   intCount     = 0;
            long   bytesWritten = 0;

            using (new ConsoleWatch($"Translating \"{filePath}\" to block \"{outPath}\"...",
                                    () => $"{intCount:n0} entries, written to {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(intCount)):n2} bits per position)"))
            {
                using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(outPath)))
                    using (SearchIndexReader reader = new SearchIndexReader(filePath))
                    {
                        for (int i = 0; i < reader.WordCount; ++i)
                        {
                            int          last   = 0;
                            SearchResult result = reader.Find(i);
                            while (!result.Done)
                            {
                                int count = result.Page(ref decoded);
                                for (int j = 0; j < count; ++j)
                                {
                                    int current = (int)decoded[j];
                                    writer.Write((absolute ? current : current - last));
                                    last = current;
                                }

                                intCount += count;
                            }
                        }

                        System.Console.WriteLine(writer.Stats);
                    }

                bytesWritten = new FileInfo(outPath).Length;
            }
        }