Beispiel #1
0
        private static void Skip(string filePath, string fromDictionaryPath)
        {
            VerifyFileExists(filePath);
            VerifyFileExists(fromDictionaryPath);

            using (new ConsoleWatch($"Reading [Skip All] {filePath} ({FileLength.MB(filePath)})..."))
            {
                if (filePath.EndsWith(".bion", StringComparison.OrdinalIgnoreCase))
                {
                    using (WordCompressor compressor = (fromDictionaryPath == null ? null : WordCompressor.OpenRead(fromDictionaryPath)))
                        using (BionReader reader = new BionReader(File.OpenRead(filePath), compressor: compressor))
                        {
                            reader.Skip();
                        }
                }
                else
                {
                    using (JsonTextReader reader = new JsonTextReader(new StreamReader(filePath)))
                    {
                        reader.Read();
                        reader.Skip();
                    }
                }
            }
        }
Beispiel #2
0
        public static void DictionaryLengths(string dictionaryPath)
        {
            long total     = 0;
            int  wordCount = 0;

            int[] countForLength = new int[129];

            using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath))
            {
                wordCount = compressor.WordCount;
                for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex)
                {
                    int length = compressor[wordIndex].Word.Length;
                    total += length;

                    if (length > 128)
                    {
                        length = 128;
                    }
                    countForLength[length]++;
                }
            }

            for (int length = 0; length < countForLength.Length; ++length)
            {
                int count = countForLength[length];
                if (count > 0)
                {
                    System.Console.WriteLine($"{length} => {count:n0}");
                }
            }

            System.Console.WriteLine($"Total {total:n0}b for {wordCount:n0} words. Avg:{((float)total / (float)wordCount):n2} bytes.");
        }
        public static void JsonToBion(string jsonPath, string bionPath, string toDictionaryPath = null)
        {
            using (WordCompressor compressor = (String.IsNullOrEmpty(toDictionaryPath) ? null : WordCompressor.OpenWrite(toDictionaryPath)))
            {
                string toPath = (compressor == null ? bionPath : Path.ChangeExtension(bionPath, ".preopt.bion"));

                using (JsonTextReader reader = new JsonTextReader(new StreamReader(jsonPath)))
                    using (BionWriter writer = new BionWriter(File.Create(toPath), compressor: compressor))
                    {
                        JsonToBion(reader, writer);
                    }

                if (compressor != null)
                {
                    string containerIndexPath = Path.ChangeExtension(bionPath, ".cdx");
                    string searchIndexPath    = Path.ChangeExtension(bionPath, ".idx");

                    using (BionReader reader = new BionReader(File.OpenRead(toPath), compressor: compressor))
                        using (BufferedWriter writer = new BufferedWriter(File.Create(bionPath)))
                        {
                            reader.RewriteOptimized(writer, containerIndexPath, searchIndexPath);
                        }

                    File.Delete(toPath);

                    // :/ Rewrite compressor; pre-optimize pass calls Dispose which writes it too early.
                    compressor.Write(File.OpenWrite(toDictionaryPath));
                }
            }
        }
Beispiel #4
0
        private static void Count(string filePath, string fromDictionaryPath)
        {
            VerifyFileExists(filePath);
            VerifyFileExists(fromDictionaryPath);
            long tokenCount = 0;

            using (new ConsoleWatch($"Reading [Count] {filePath} ({FileLength.MB(filePath)})...",
                                    () => $"Done; {tokenCount:n0} tokens found in file"))
            {
                if (filePath.EndsWith(".bion", StringComparison.OrdinalIgnoreCase))
                {
                    using (WordCompressor compressor = (fromDictionaryPath == null ? null : WordCompressor.OpenRead(fromDictionaryPath)))
                        using (BionReader reader = new BionReader(File.OpenRead(filePath), compressor: compressor))
                        {
                            while (reader.Read())
                            {
                                tokenCount++;
                            }
                        }
                }
                else
                {
                    using (JsonTextReader reader = new JsonTextReader(new StreamReader(filePath)))
                    {
                        while (reader.Read())
                        {
                            tokenCount++;
                        }
                    }
                }
            }
        }
Beispiel #5
0
        public static void TranslateDictionaryPositions(string dictionaryPath, string blockPath, bool absolute)
        {
            int  wordCount    = 0;
            int  totalLength  = 0;
            long bytesWritten = 0;

            using (new ConsoleWatch($"Translating {dictionaryPath} positions to {blockPath}...",
                                    () => $"{wordCount:n0} words, total length {totalLength:n0}, written to {bytesWritten:n0} bytes ({((float)(8 * bytesWritten) / (float)(wordCount)):n2} bits per position)"))
            {
                using (IntBlockWriter writer = new IntBlockWriter(new BufferedWriter(blockPath)))
                    using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath))
                    {
                        wordCount = compressor.WordCount;
                        writer.Write(0);

                        for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex)
                        {
                            int length = compressor[wordIndex].Word.Length;
                            totalLength += length;

                            writer.Write((absolute ? totalLength : length));
                        }

                        System.Console.WriteLine(writer.Stats);
                    }

                bytesWritten = new FileInfo(blockPath).Length;
            }
        }
Beispiel #6
0
        private static void Compress(string fromPath, string toPath, string toDictionaryPath)
        {
            VerifyFileExists(fromPath);

            using (new ConsoleWatch($"Compressing {fromPath}...",
                                    () => $"Done. {FileLength.MB(fromPath)} to {FileLength.MB(toPath)} + {FileLength.MB(toDictionaryPath)} dictionary ({FileLength.Percentage(fromPath, toPath, toDictionaryPath)})"))
            {
                WordCompressor.Compress(fromPath, toPath, toDictionaryPath);
            }
        }
Beispiel #7
0
        public BionSearcher(string bionFilePath, int runDepth)
        {
            _compressor        = Memory.Log("Dictionary", () => WordCompressor.OpenRead(Path.ChangeExtension(bionFilePath, ".wdx")));
            _containerIndex    = Memory.Log("ContainerIndex", () => ContainerIndex.OpenRead(Path.ChangeExtension(bionFilePath, ".cdx")));
            _searchIndexReader = Memory.Log("SearchIndex", () => new SearchIndexReader(Path.ChangeExtension(bionFilePath, ".idx")));
            _bionReader        = Memory.Log("BionReader", () => new BionReader(File.OpenRead(bionFilePath), containerIndex: _containerIndex, compressor: _compressor));

            _runDepth      = runDepth;
            _termPositions = new long[256];
        }
 public static void BionToJson(string bionPath, string jsonPath, string fromDictionaryPath = null)
 {
     using (WordCompressor compressor = (String.IsNullOrEmpty(fromDictionaryPath) ? null : WordCompressor.OpenRead(fromDictionaryPath)))
         using (BionReader reader = new BionReader(File.OpenRead(bionPath), compressor: compressor))
             using (JsonTextWriter writer = new JsonTextWriter(new StreamWriter(jsonPath)))
             {
                 writer.Formatting = Formatting.Indented;
                 BionToJson(reader, writer);
             }
 }
Beispiel #9
0
        private static void Expand(string fromPath, string toPath, string fromDictionaryPath)
        {
            VerifyFileExists(fromPath);
            VerifyFileExists(fromDictionaryPath);

            using (new ConsoleWatch($"Expanding {fromPath}...",
                                    () => $"Done. {FileLength.MB(fromPath)} + {FileLength.MB(fromDictionaryPath)} dictionary to {FileLength.MB(toPath)}"))
            {
                WordCompressor.Expand(fromPath, toPath, fromDictionaryPath);
            }
        }
Beispiel #10
0
        public void Dispose()
        {
            if (_writer != null)
            {
                _writer.Dispose();
                _writer = null;
            }

            if (_compressor != null)
            {
                _compressor.Dispose();
                _compressor = null;
            }
        }
Beispiel #11
0
        public void Dispose()
        {
            if (_reader != null)
            {
                _reader.Dispose();
                _reader = null;
            }

            if (_compressor != null)
            {
                _compressor.Dispose();
                _compressor = null;
            }
        }
Beispiel #12
0
        public void Dispose()
        {
            _compressor?.Dispose();
            _compressor = null;

            _searchIndexReader?.Dispose();
            _searchIndexReader = null;

            _containerIndex?.Dispose();
            _containerIndex = null;

            _bionReader?.Dispose();
            _bionReader = null;
        }
Beispiel #13
0
        public static void WriteWordsForLength(string dictionaryPath, int length)
        {
            int countWritten = 0;

            using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath))
            {
                for (int wordIndex = 0; wordIndex < compressor.WordCount; ++wordIndex)
                {
                    String8 word = compressor[wordIndex].Word;
                    if (word.Length == length)
                    {
                        System.Console.WriteLine(word);
                        if (++countWritten == 100)
                        {
                            return;
                        }
                    }
                }
            }
        }
Beispiel #14
0
        public void ContainerIndex_EndToEnd()
        {
            string jsonFilePath   = @"Content\Medium.json";
            string bionFilePath   = Path.ChangeExtension(jsonFilePath, ".bion");
            string dictionaryPath = Path.ChangeExtension(bionFilePath, "dict.bion");
            string comparePath    = Path.ChangeExtension(jsonFilePath, "compare.json");

            JsonBionConverter.JsonToBion(jsonFilePath, bionFilePath, dictionaryPath);

            using (WordCompressor compressor = WordCompressor.OpenRead(dictionaryPath))
                using (ContainerIndex cIndex = ContainerIndex.OpenRead(Path.ChangeExtension(bionFilePath, ".cdx")))
                    using (BionReader reader = new BionReader(File.OpenRead(bionFilePath), cIndex, compressor))
                    {
                        for (int i = 0; i < cIndex.Count; ++i)
                        {
                            ContainerEntry container = cIndex[i];

                            // Seek to container start
                            reader.Seek(container.StartByteOffset);

                            // Verify a container start is there
                            int depth = reader.Depth;
                            reader.Read();

                            bool isObject = (reader.TokenType == BionToken.StartObject);
                            Assert.AreEqual((isObject ? BionToken.StartObject : BionToken.StartArray), reader.TokenType);

                            // Read until the depth is back to the same value
                            while (reader.Depth != depth)
                            {
                                reader.Read();
                            }

                            // Verify this is the end container position
                            Assert.AreEqual((isObject ? BionToken.EndObject : BionToken.EndArray), reader.TokenType);
                            Assert.AreEqual(container.EndByteOffset, reader.BytesRead);
                        }
                    }
        }
Beispiel #15
0
        public void WordCompressor_RoundTrip()
        {
            string originalPath   = @"Content\Medium.json";
            string compressedPath = "Medium.compressed.bin";
            string dictionaryPath = "Medium.compressed.dict";
            string comparePath    = "Medium.roundtrip.json";

            // Roundtrip without optimization; verify files equal
            WordCompressor.Compress(originalPath, compressedPath, dictionaryPath, false);
            WordCompressor.Expand(compressedPath, comparePath, dictionaryPath);
            Verify.FilesEqual(originalPath, comparePath);
            Verify.SizeRatioUnder(originalPath, compressedPath, 0.5f);

            File.Delete(compressedPath);
            File.Delete(dictionaryPath);
            File.Delete(comparePath);

            // Roundtrip *with* optimization; verify files equal
            WordCompressor.Compress(originalPath, compressedPath, dictionaryPath, true);
            WordCompressor.Expand(compressedPath, comparePath, dictionaryPath);
            Verify.FilesEqual(originalPath, comparePath);
            Verify.SizeRatioUnder(originalPath, compressedPath, 0.5f);
        }
Beispiel #16
0
 public BionWriter(BufferedWriter writer, ContainerIndex containerIndex = null, WordCompressor compressor = null)
 {
     _writer         = writer;
     _containerIndex = containerIndex;
     _compressor     = compressor;
 }
Beispiel #17
0
 public BionWriter(Stream stream, ContainerIndex containerIndex = null, WordCompressor compressor = null) : this(new BufferedWriter(stream), containerIndex, compressor)
 {
 }
Beispiel #18
0
        private void btnCompress_Click(object sender, EventArgs e)
        {
            WordCompressor compressor = new WordCompressor();

            compressor.Solve();
        }
Beispiel #19
0
 public BionReader(BufferedReader reader, ContainerIndex containerIndex = null, WordCompressor compressor = null)
 {
     _reader         = reader;
     _compressor     = compressor;
     _containerIndex = containerIndex;
 }
Beispiel #20
0
        private static void Read(string filePath, string fromDictionaryPath)
        {
            VerifyFileExists(filePath);
            VerifyFileExists(fromDictionaryPath);
            long tokenCount = 0;

            using (new ConsoleWatch($"Reading [Full] {filePath} ({FileLength.MB(filePath)})...",
                                    () => $"Done; {tokenCount:n0} tokens found in file"))
            {
                if (filePath.EndsWith(".bion", StringComparison.OrdinalIgnoreCase))
                {
                    using (WordCompressor compressor = (fromDictionaryPath == null ? null : WordCompressor.OpenRead(fromDictionaryPath)))
                        using (BionReader reader = new BionReader(File.OpenRead(filePath), compressor: compressor))
                        {
                            while (reader.Read())
                            {
                                tokenCount++;

                                switch (reader.TokenType)
                                {
                                case BionToken.PropertyName:
                                case BionToken.String:
                                    String8 value8 = reader.CurrentString8();
                                    //string valueS = reader.CurrentString();
                                    break;

                                case BionToken.Integer:
                                    long valueI = reader.CurrentInteger();
                                    break;

                                case BionToken.Float:
                                    double valueF = reader.CurrentFloat();
                                    break;
                                }
                            }
                        }
                }
                else
                {
                    using (JsonTextReader reader = new JsonTextReader(new StreamReader(filePath)))
                    {
                        while (reader.Read())
                        {
                            tokenCount++;

                            switch (reader.TokenType)
                            {
                            case JsonToken.PropertyName:
                            case JsonToken.String:
                                string valueS = (string)reader.Value;
                                break;

                            case JsonToken.Integer:
                                long valueI = (long)reader.Value;
                                break;

                            case JsonToken.Float:
                                double valueF = (double)reader.Value;
                                break;
                            }
                        }
                    }
                }
            }
        }