public static void DictionaryLengths(string dictionaryPath) { long total = 0; int wordCount = 0; int[] countForLength = new int[129]; using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath)) { wordCount = compressor.WordCount; for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex) { int length = compressor[wordIndex].Word.Length; total += length; if (length > 128) { length = 128; } countForLength[length]++; } } for (int length = 0; length < countForLength.Length; ++length) { int count = countForLength[length]; if (count > 0) { System.Console.WriteLine($"{length} => {count:n0}"); } } System.Console.WriteLine($"Total {total:n0}b for {wordCount:n0} words. Avg:{((float)total / (float)wordCount):n2} bytes."); }
public static void TranslateDictionaryPositions(string dictionaryPath, string blockPath, bool absolute) { int wordCount = 0; int totalLength = 0; long bytesWritten = 0; using (new ConsoleWatch($"Translating {dictionaryPath} positions to {blockPath}...", () => $"{wordCount:n0} words, total length {totalLength:n0}, written to {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(wordCount)):n2} bits per position)")) { using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(blockPath))) using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath)) { wordCount = compressor.WordCount; writer.Write(0); for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex) { int length = compressor[wordIndex].Word.Length; totalLength += length; writer.Write((absolute ? totalLength : length)); } System.Console.WriteLine(writer.Stats); } bytesWritten = new FileInfo(blockPath).Length; } }
public BionSearcher(string bionFilePath, int runDepth) { _compressor = Memory.Log("Dictionary", () => WordCompressor.OpenRead(Path.ChangeExtension(bionFilePath, ".wdx"))); _containerIndex = Memory.Log("ContainerIndex", () => ContainerIndex.OpenRead(Path.ChangeExtension(bionFilePath, ".cdx"))); _searchIndexReader = Memory.Log("SearchIndex", () => new SearchIndexReader(Path.ChangeExtension(bionFilePath, ".idx"))); _bionReader = Memory.Log("BionReader", () => new BionReader(File.OpenRead(bionFilePath), containerIndex: _containerIndex, compressor: _compressor)); _runDepth = runDepth; _termPositions = new long[256]; }
public static void WriteWordsForLength(string dictionaryPath, int length) { int countWritten = 0; using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath)) { for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex) { String8 word = compressor[wordIndex].Word; if (word.Length == length) { System.Console.WriteLine(word); if (++countWritten == 100) { return; } } } } }
public void ContainerIndex_EndToEnd() { string jsonFilePath = @"Content\Medium.json"; string bionFilePath = Path.ChangeExtension(jsonFilePath, ".bion"); string dictionaryPath = Path.ChangeExtension(bionFilePath, "dict.bion"); string comparePath = Path.ChangeExtension(jsonFilePath, "compare.json"); JsonBionConverter.JsonToBion(jsonFilePath, bionFilePath, dictionaryPath); using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath)) using (ContainerIndex cIndex = ContainerIndex.OpenRead(Path.ChangeExtension(bionFilePath, ".cdx"))) using (BionReader reader = new BionReader(File.OpenRead(bionFilePath), cIndex, compressor)) { for (int i = 0; i < cIndex.Count; ++i) { ContainerEntry container = cIndex[i]; // Seek to container start reader.Seek(container.StartByteOffset); // Verify a container start is there int depth = reader.Depth; reader.Read(); bool isObject = (reader.TokenType == BionToken.StartObject); Assert.AreEqual((isObject ? BionToken.StartObject : BionToken.StartArray), reader.TokenType); // Read until the depth is back to the same value while (reader.Depth != depth) { reader.Read(); } // Verify this is the end container position Assert.AreEqual((isObject ? BionToken.EndObject : BionToken.EndArray), reader.TokenType); Assert.AreEqual(container.EndByteOffset, reader.BytesRead); } } }
public static void BionToJson(string bionPath, string jsonPath, string fromDictionaryPath = null) { using (WordCompressor compressor = (String.IsNullOrEmpty(fromDictionaryPath) ? null : WordCompressor.OpenRead(fromDictionaryPath))) using (BionReader reader = new BionReader(File.OpenRead(bionPath), compressor: compressor)) using (JsonTextWriter writer = new JsonTextWriter(new StreamWriter(jsonPath))) { writer.Formatting = Formatting.Indented; BionToJson(reader, writer); } }
private static void Skip(string filePath, string fromDictionaryPath) { VerifyFileExists(filePath); VerifyFileExists(fromDictionaryPath); using (new ConsoleWatch($"Reading [Skip All] {filePath} ({FileLength.MB(filePath)})...")) { if (filePath.EndsWith(".bion", StringComparison.OrdinalIgnoreCase)) { using (WordCompressor compressor = (fromDictionaryPath == null ? null : WordCompressor.OpenRead(fromDictionaryPath))) using (BionReader reader = new BionReader(File.OpenRead(filePath), compressor: compressor)) { reader.Skip(); } } else { using (JsonTextReader reader = new JsonTextReader(new StreamReader(filePath))) { reader.Read(); reader.Skip(); } } } }
private static void Count(string filePath, string fromDictionaryPath) { VerifyFileExists(filePath); VerifyFileExists(fromDictionaryPath); long tokenCount = 0; using (new ConsoleWatch($"Reading [Count] {filePath} ({FileLength.MB(filePath)})...", () => $"Done; {tokenCount:n0} tokens found in file")) { if (filePath.EndsWith(".bion", StringComparison.OrdinalIgnoreCase)) { using (WordCompressor compressor = (fromDictionaryPath == null ? null : WordCompressor.OpenRead(fromDictionaryPath))) using (BionReader reader = new BionReader(File.OpenRead(filePath), compressor: compressor)) { while (reader.Read()) { tokenCount++; } } } else { using (JsonTextReader reader = new JsonTextReader(new StreamReader(filePath))) { while (reader.Read()) { tokenCount++; } } } } }
private static void Read(string filePath, string fromDictionaryPath) { VerifyFileExists(filePath); VerifyFileExists(fromDictionaryPath); long tokenCount = 0; using (new ConsoleWatch($"Reading [Full] {filePath} ({FileLength.MB(filePath)})...", () => $"Done; {tokenCount:n0} tokens found in file")) { if (filePath.EndsWith(".bion", StringComparison.OrdinalIgnoreCase)) { using (WordCompressor compressor = (fromDictionaryPath == null ? null : WordCompressor.OpenRead(fromDictionaryPath))) using (BionReader reader = new BionReader(File.OpenRead(filePath), compressor: compressor)) { while (reader.Read()) { tokenCount++; switch (reader.TokenType) { case BionToken.PropertyName: case BionToken.String: String8 value8 = reader.CurrentString8(); //string valueS = reader.CurrentString(); break; case BionToken.Integer: long valueI = reader.CurrentInteger(); break; case BionToken.Float: double valueF = reader.CurrentFloat(); break; } } } } else { using (JsonTextReader reader = new JsonTextReader(new StreamReader(filePath))) { while (reader.Read()) { tokenCount++; switch (reader.TokenType) { case JsonToken.PropertyName: case JsonToken.String: string valueS = (string)reader.Value; break; case JsonToken.Integer: long valueI = (long)reader.Value; break; case JsonToken.Float: double valueF = (double)reader.Value; break; } } } } } }