예제 #1
0
        public static void DictionaryLengths(string dictionaryPath)
        {
            long total     = 0;
            int  wordCount = 0;

            int[] countForLength = new int[129];

            using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath))
            {
                wordCount = compressor.WordCount;
                for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex)
                {
                    int length = compressor[wordIndex].Word.Length;
                    total += length;

                    if (length > 128)
                    {
                        length = 128;
                    }
                    countForLength[length]++;
                }
            }

            for (int length = 0; length < countForLength.Length; ++length)
            {
                int count = countForLength[length];
                if (count > 0)
                {
                    System.Console.WriteLine($"{length} => {count:n0}");
                }
            }

            System.Console.WriteLine($"Total {total:n0}b for {wordCount:n0} words. Avg:{((float)total / (float)wordCount):n2} bytes.");
        }
예제 #2
0
        public static void TranslateDictionaryPositions(string dictionaryPath, string blockPath, bool absolute)
        {
            int  wordCount    = 0;
            int  totalLength  = 0;
            long bytesWritten = 0;

            using (new ConsoleWatch($"Translating {dictionaryPath} positions to {blockPath}...",
                                    () => $"{wordCount:n0} words, total length {totalLength:n0}, written to {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(wordCount)):n2} bits per position)"))
            {
                using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(blockPath)))
                    using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath))
                    {
                        wordCount = compressor.WordCount;
                        writer.Write(0);

                        for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex)
                        {
                            int length = compressor[wordIndex].Word.Length;
                            totalLength += length;

                            writer.Write((absolute ? totalLength : length));
                        }

                        System.Console.WriteLine(writer.Stats);
                    }

                bytesWritten = new FileInfo(blockPath).Length;
            }
        }
예제 #3
0
        public BionSearcher(string bionFilePath, int runDepth)
        {
            _compressor        = Memory.Log("Dictionary", () => WordCompressor.OpenRead(Path.ChangeExtension(bionFilePath, ".wdx")));
            _containerIndex    = Memory.Log("ContainerIndex", () => ContainerIndex.OpenRead(Path.ChangeExtension(bionFilePath, ".cdx")));
            _searchIndexReader = Memory.Log("SearchIndex", () => new SearchIndexReader(Path.ChangeExtension(bionFilePath, ".idx")));
            _bionReader        = Memory.Log("BionReader", () => new BionReader(File.OpenRead(bionFilePath), containerIndex: _containerIndex, compressor: _compressor));

            _runDepth      = runDepth;
            _termPositions = new long[256];
        }
예제 #4
0
        public static void WriteWordsForLength(string dictionaryPath, int length)
        {
            int countWritten = 0;

            using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath))
            {
                for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex)
                {
                    String8 word = compressor[wordIndex].Word;
                    if (word.Length == length)
                    {
                        System.Console.WriteLine(word);
                        if (++countWritten == 100)
                        {
                            return;
                        }
                    }
                }
            }
        }
예제 #5
0
        public void ContainerIndex_EndToEnd()
        {
            string jsonFilePath   = @"Content\Medium.json";
            string bionFilePath   = Path.ChangeExtension(jsonFilePath, ".bion");
            string dictionaryPath = Path.ChangeExtension(bionFilePath, "dict.bion");
            string comparePath    = Path.ChangeExtension(jsonFilePath, "compare.json");

            JsonBionConverter.JsonToBion(jsonFilePath, bionFilePath, dictionaryPath);

            using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath))
                using (ContainerIndex cIndex = ContainerIndex.OpenRead(Path.ChangeExtension(bionFilePath, ".cdx")))
                    using (BionReader reader = new BionReader(File.OpenRead(bionFilePath), cIndex, compressor))
                    {
                        for (int i = 0; i < cIndex.Count; ++i)
                        {
                            ContainerEntry container = cIndex[i];

                            // Seek to container start
                            reader.Seek(container.StartByteOffset);

                            // Verify a container start is there
                            int depth = reader.Depth;
                            reader.Read();

                            bool isObject = (reader.TokenType == BionToken.StartObject);
                            Assert.AreEqual((isObject ? BionToken.StartObject : BionToken.StartArray), reader.TokenType);

                            // Read until the depth is back to the same value
                            while (reader.Depth != depth)
                            {
                                reader.Read();
                            }

                            // Verify this is the end container position
                            Assert.AreEqual((isObject ? BionToken.EndObject : BionToken.EndArray), reader.TokenType);
                            Assert.AreEqual(container.EndByteOffset, reader.BytesRead);
                        }
                    }
        }
예제 #6
0
 public static void BionToJson(string bionPath, string jsonPath, string fromDictionaryPath = null)
 {
     using (WordCompressor compressor = (String.IsNullOrEmpty(fromDictionaryPath) ? null : WordCompressor.OpenRead(fromDictionaryPath)))
         using (BionReader reader = new BionReader(File.OpenRead(bionPath), compressor: compressor))
             using (JsonTextWriter writer = new JsonTextWriter(new StreamWriter(jsonPath)))
             {
                 writer.Formatting = Formatting.Indented;
                 BionToJson(reader, writer);
             }
 }
예제 #7
0
        private static void Skip(string filePath, string fromDictionaryPath)
        {
            VerifyFileExists(filePath);
            VerifyFileExists(fromDictionaryPath);

            using (new ConsoleWatch($"Reading [Skip All] {filePath} ({FileLength.MB(filePath)})..."))
            {
                if (filePath.EndsWith(".bion", StringComparison.OrdinalIgnoreCase))
                {
                    using (WordCompressor compressor = (fromDictionaryPath == null ? null : WordCompressor.OpenRead(fromDictionaryPath)))
                        using (BionReader reader = new BionReader(File.OpenRead(filePath), compressor: compressor))
                        {
                            reader.Skip();
                        }
                }
                else
                {
                    using (JsonTextReader reader = new JsonTextReader(new StreamReader(filePath)))
                    {
                        reader.Read();
                        reader.Skip();
                    }
                }
            }
        }
예제 #8
0
        private static void Count(string filePath, string fromDictionaryPath)
        {
            VerifyFileExists(filePath);
            VerifyFileExists(fromDictionaryPath);
            long tokenCount = 0;

            using (new ConsoleWatch($"Reading [Count] {filePath} ({FileLength.MB(filePath)})...",
                                    () => $"Done; {tokenCount:n0} tokens found in file"))
            {
                if (filePath.EndsWith(".bion", StringComparison.OrdinalIgnoreCase))
                {
                    using (WordCompressor compressor = (fromDictionaryPath == null ? null : WordCompressor.OpenRead(fromDictionaryPath)))
                        using (BionReader reader = new BionReader(File.OpenRead(filePath), compressor: compressor))
                        {
                            while (reader.Read())
                            {
                                tokenCount++;
                            }
                        }
                }
                else
                {
                    using (JsonTextReader reader = new JsonTextReader(new StreamReader(filePath)))
                    {
                        while (reader.Read())
                        {
                            tokenCount++;
                        }
                    }
                }
            }
        }
예제 #9
0
        private static void Read(string filePath, string fromDictionaryPath)
        {
            VerifyFileExists(filePath);
            VerifyFileExists(fromDictionaryPath);
            long tokenCount = 0;

            using (new ConsoleWatch($"Reading [Full] {filePath} ({FileLength.MB(filePath)})...",
                                    () => $"Done; {tokenCount:n0} tokens found in file"))
            {
                if (filePath.EndsWith(".bion", StringComparison.OrdinalIgnoreCase))
                {
                    using (WordCompressor compressor = (fromDictionaryPath == null ? null : WordCompressor.OpenRead(fromDictionaryPath)))
                        using (BionReader reader = new BionReader(File.OpenRead(filePath), compressor: compressor))
                        {
                            while (reader.Read())
                            {
                                tokenCount++;

                                switch (reader.TokenType)
                                {
                                case BionToken.PropertyName:
                                case BionToken.String:
                                    String8 value8 = reader.CurrentString8();
                                    //string valueS = reader.CurrentString();
                                    break;

                                case BionToken.Integer:
                                    long valueI = reader.CurrentInteger();
                                    break;

                                case BionToken.Float:
                                    double valueF = reader.CurrentFloat();
                                    break;
                                }
                            }
                        }
                }
                else
                {
                    using (JsonTextReader reader = new JsonTextReader(new StreamReader(filePath)))
                    {
                        while (reader.Read())
                        {
                            tokenCount++;

                            switch (reader.TokenType)
                            {
                            case JsonToken.PropertyName:
                            case JsonToken.String:
                                string valueS = (string)reader.Value;
                                break;

                            case JsonToken.Integer:
                                long valueI = (long)reader.Value;
                                break;

                            case JsonToken.Float:
                                double valueF = (double)reader.Value;
                                break;
                            }
                        }
                    }
                }
            }
        }