Пример #1
0
        private static void RoundTrip(int[] values, long expectedLength)
        {
            string fileName = "IntBlock.bin";

            using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(File.Create(fileName))))
            {
                for (int i = 0; i < values.Length; ++i)
                {
                    writer.Write(values[i]);
                }
            }

            Assert.AreEqual(expectedLength, new FileInfo(fileName).Length);

            using (IntBlockReader reader = new IntBlockReader(new BufferedReader(File.OpenRead(fileName))))
            {
                int[] readValues;
                int   index = 0;
                int   count = 0;

                do
                {
                    count = reader.Next(out readValues);
                    for (int i = 0; i < count; ++i)
                    {
                        Assert.AreEqual(values[index++], readValues[i]);
                    }
                } while (count == IntBlock.BlockSize);

                Assert.AreEqual(values.Length, index);
            }
        }
Пример #2
0
        public static void WriteSyntheticBlock(string blockPath, long count)
        {
            int index = 0;

            int[] block = new int[IntBlock.BlockSize];

            long bytesWritten = 0;

            using (new ConsoleWatch($"Writing Synthetic {count:n0} ints to {blockPath}...",
                                    () => $"{count:n0} values, written as {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(count)):n2} bits per value)."))
            {
                using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(blockPath)))
                {
                    for (long i = 0; i < count; ++i)
                    {
                        int value = (int)(i & 15);
                        //writer.Write(value);

                        block[index++] = value;
                        if (index == IntBlock.BlockSize)
                        {
                            writer.Write(block, 0, index);
                            index = 0;
                        }
                    }

                    System.Console.WriteLine(writer.Stats);
                }

                bytesWritten = new FileInfo(blockPath).Length;
            }
        }
Пример #3
0
        public static void TranslateDictionaryPositions(string dictionaryPath, string blockPath, bool absolute)
        {
            int  wordCount    = 0;
            int  totalLength  = 0;
            long bytesWritten = 0;

            using (new ConsoleWatch($"Translating {dictionaryPath} positions to {blockPath}...",
                                    () => $"{wordCount:n0} words, total length {totalLength:n0}, written to {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(wordCount)):n2} bits per position)"))
            {
                using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(blockPath)))
                    using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath))
                    {
                        wordCount = compressor.WordCount;
                        writer.Write(0);

                        for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex)
                        {
                            int length = compressor[wordIndex].Word.Length;
                            totalLength += length;

                            writer.Write((absolute ? totalLength : length));
                        }

                        System.Console.WriteLine(writer.Stats);
                    }

                bytesWritten = new FileInfo(blockPath).Length;
            }
        }
Пример #4
0
        public static void TranslateSixBit(string filePath, string outPath)
        {
            int bufferSizeBytes = 65536;

            ulong[] decoded   = new ulong[BlockSize];
            byte[]  buffer    = new byte[bufferSizeBytes];
            long    totalSize = 0;

            using (new ConsoleWatch($"Translate(\"{filePath}\", {bufferSizeBytes})", () => $"Done; {totalSize:n0} bytes"))
            {
                using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(outPath)))
                    using (BufferedReader reader = new BufferedReader(File.OpenRead(filePath), buffer))
                    {
                        while (!reader.EndOfStream)
                        {
                            int count = NumberConverter.ReadSixBitTerminatedBlock(reader, decoded);
                            if (count < BlockSize)
                            {
                                Array.Fill <ulong>(decoded, 0, count, BlockSize - count);
                            }

                            for (int i = 0; i < BlockSize; ++i)
                            {
                                writer.Write((int)decoded[i]);
                            }
                        }

                        totalSize = reader.BytesRead;
                    }
            }
        }
Пример #5
0
        public static void TranslateSearchIndex(string filePath, string outPath, bool absolute)
        {
            long[] decoded      = new long[BlockSize];
            long   intCount     = 0;
            long   bytesWritten = 0;

            using (new ConsoleWatch($"Translating \"{filePath}\" to block \"{outPath}\"...",
                                    () => $"{intCount:n0} entries, written to {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(intCount)):n2} bits per position)"))
            {
                using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(outPath)))
                    using (SearchIndexReader reader = new SearchIndexReader(filePath))
                    {
                        for (int i = 0; i < reader.WordCount; ++i)
                        {
                            int          last   = 0;
                            SearchResult result = reader.Find(i);
                            while (!result.Done)
                            {
                                int count = result.Page(ref decoded);
                                for (int j = 0; j < count; ++j)
                                {
                                    int current = (int)decoded[j];
                                    writer.Write((absolute ? current : current - last));
                                    last = current;
                                }

                                intCount += count;
                            }
                        }

                        System.Console.WriteLine(writer.Stats);
                    }

                bytesWritten = new FileInfo(outPath).Length;
            }
        }