private static void RoundTrip(int[] values, long expectedLength) { string fileName = "IntBlock.bin"; using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(File.Create(fileName)))) { for (int i = 0; i < values.Length; ++i) { writer.Write(values[i]); } } Assert.AreEqual(expectedLength, new FileInfo(fileName).Length); using (IntBlockReader reader = new IntBlockReader(new BufferedReader(File.OpenRead(fileName)))) { int[] readValues; int index = 0; int count = 0; do { count = reader.Next(out readValues); for (int i = 0; i < count; ++i) { Assert.AreEqual(values[index++], readValues[i]); } } while (count == IntBlock.BlockSize); Assert.AreEqual(values.Length, index); } }
public static void WriteSyntheticBlock(string blockPath, long count) { int index = 0; int[] block = new int[IntBlock.BlockSize]; long bytesWritten = 0; using (new ConsoleWatch($"Writing Synthetic {count:n0} ints to {blockPath}...", () => $"{count:n0} values, written as {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(count)):n2} bits per value).")) { using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(blockPath))) { for (long i = 0; i < count; ++i) { int value = (int)(i & 15); //writer.Write(value); block[index++] = value; if (index == IntBlock.BlockSize) { writer.Write(block, 0, index); index = 0; } } System.Console.WriteLine(writer.Stats); } bytesWritten = new FileInfo(blockPath).Length; } }
public static void TranslateDictionaryPositions(string dictionaryPath, string blockPath, bool absolute) { int wordCount = 0; int totalLength = 0; long bytesWritten = 0; using (new ConsoleWatch($"Translating {dictionaryPath} positions to {blockPath}...", () => $"{wordCount:n0} words, total length {totalLength:n0}, written to {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(wordCount)):n2} bits per position)")) { using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(blockPath))) using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath)) { wordCount = compressor.WordCount; writer.Write(0); for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex) { int length = compressor[wordIndex].Word.Length; totalLength += length; writer.Write((absolute ? totalLength : length)); } System.Console.WriteLine(writer.Stats); } bytesWritten = new FileInfo(blockPath).Length; } }
public static void TranslateSixBit(string filePath, string outPath) { int bufferSizeBytes = 65536; ulong[] decoded = new ulong[BlockSize]; byte[] buffer = new byte[bufferSizeBytes]; long totalSize = 0; using (new ConsoleWatch($"Translate(\"{filePath}\", {bufferSizeBytes})", () => $"Done; {totalSize:n0} bytes")) { using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(outPath))) using (BufferedReader reader = new BufferedReader(File.OpenRead(filePath), buffer)) { while (!reader.EndOfStream) { int count = NumberConverter.ReadSixBitTerminatedBlock(reader, decoded); if (count < BlockSize) { Array.Fill <ulong>(decoded, 0, count, BlockSize - count); } for (int i = 0; i < BlockSize; ++i) { writer.Write((int)decoded[i]); } } totalSize = reader.BytesRead; } } }
public static void TranslateSearchIndex(string filePath, string outPath, bool absolute) { long[] decoded = new long[BlockSize]; long intCount = 0; long bytesWritten = 0; using (new ConsoleWatch($"Translating \"{filePath}\" to block \"{outPath}\"...", () => $"{intCount:n0} entries, written to {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(intCount)):n2} bits per position)")) { using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(outPath))) using (SearchIndexReader reader = new SearchIndexReader(filePath)) { for (int i = 0; i < reader.WordCount; ++i) { int last = 0; SearchResult result = reader.Find(i); while (!result.Done) { int count = result.Page(ref decoded); for (int j = 0; j < count; ++j) { int current = (int)decoded[j]; writer.Write((absolute ? current : current - last)); last = current; } intCount += count; } } System.Console.WriteLine(writer.Stats); } bytesWritten = new FileInfo(outPath).Length; } }