Пример #1
0
        public void ConstructionWithIntOutputs()
        {
            var inputs = new string[]
            {
                "a",
                "ab",
                "abilities",
                "ability",
            };

            var outputs = new int[]
            {
                4,
                3134,
                7488,
                1580,
            };

            var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance);

            Verify(fst1, inputs, outputs);

            var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance);

            Verify(fst2, inputs, outputs);
        }
Пример #2
0
        public void SimpleConstructionWithStringOutputs()
        {
            var inputs = new string[]
            {
                "a banana",
                "a lemon",
                "a mandarine",
                "a mango",
                "an apple",
                "an orange",
            };

            var outputs = new string[]
            {
                "one",
                "two",
                "three",
                "minusone",
                "minustwo",
                "minuseight"
            };

            var fst = new FSTBuilder <string>(FSTStringOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var fst1 = FST <string> .FromBytes(fst.GetBytes(), FSTStringOutput.Instance);

            Verify(fst1, inputs, outputs);

            var fst2 = FST <string> .FromBytesCompressed(fst.GetBytesCompressed(), FSTStringOutput.Instance);

            Verify(fst2, inputs, outputs);
        }
Пример #3
0
        public void SimpleConstructionWithIntOutputs()
        {
            var inputs = new string[]
            {
                "a banana",
                "a lemon",
                "a mandarine",
                "a mango",
                "an apple",
                "an orange",
            };

            var outputs = new int[]
            {
                1,
                2,
                3,
                -2,
                15,
                8
            };

            var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance);

            Verify(fst1, inputs, outputs);

            var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance);

            Verify(fst2, inputs, outputs);
        }
 public PersistentDictionaryFst(IPersistentStorage storage,
                                int maxTokenLength,
                                ITextEncoding encoding)
 {
     this.maxTokenByteLength = encoding.GetMaxEncodedLength(maxTokenLength);
     this.storage            = storage;
     this.encoding           = encoding;
     if (storage.Length > 0)
     {
         var buffer = new byte[storage.Length];
         storage.ReadAll(0, buffer, 0, buffer.Length);
         this.fst = FST <int> .FromBytesCompressed(buffer, FSTVarIntOutput.Instance);
     }
 }
Пример #5
0
        private static int DoPrint(PrintOptions opts)
        {
            var timer = Stopwatch.StartNew();

            var bytes    = File.ReadAllBytes(opts.InputFile);
            var fstBytes = bytes.Skip(7).ToArray();
            var fst      = default(FST <int>);

            if (bytes[6] == 'D')
            {
                fst = FST <int> .FromBytes(fstBytes, outputType);
            }
            else if (bytes[6] == 'C')
            {
                fst = FST <int> .FromBytesCompressed(fstBytes, outputType);
            }
            else
            {
                throw new NotSupportedException("FST format is not supported or input is not correct");
            }
            PrintConsole(ConsoleColor.White, $"FST read from: {opts.InputFile}, time: {timer.Elapsed}");

            timer.Restart();
            var terms = 0;

            foreach (var term in fst.Match(new WildcardMatcher(opts.Pattern, 255)))
            {
                if (!fst.TryMatch(term, out int value))
                {
                    throw new Exception("This is a bug");
                }

                ++terms;
                Console.WriteLine($"{term}->{value}");
            }
            PrintConsole(ConsoleColor.White, $"FST print terms: {terms}, time: {timer.Elapsed}");

            return(0);
        }
        public void AcceptanceTestWithIntegerOutput()
        {
            var inputs = new string[] {
                "Albert Schweitzer Ziekenhuis. Locatie Amstelwijck Heliport",
                "Amsterdam Airfield",
                "Amsterdam Airport",
                "Amsterdam Airport Schiphol",
                "Amsterdam Heliport",
                "Chafei Amsei Airport",
                "New Amsterdam Airport",
                "Schwarzheide/Schipkau Airport"
            };

            var outputs = new int[] {
                43711,
                23465,
                41198,
                2513,
                43207,
                5873,
                41521,
                29065
            };

            var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance);

            Verify(fst1, inputs, outputs);

            var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance);

            Verify(fst2, inputs, outputs);
        }
Пример #7
0
        private static int DoBuild(BuildOptions opts)
        {
            var timer = Stopwatch.StartNew();

            if (File.Exists(opts.OutputFile))
            {
                File.Delete(opts.OutputFile);
            }

            timer.Restart();
            var terms = 0;

            using (var outputFile = new FileStorage(opts.OutputFile))
            {
                using (var fstBuilder = new FSTBuilder <int>(outputType, opts.CacheSize, outputFile))
                {
                    fstBuilder.Begin();
                    foreach (var(term, score) in ParseFromOptions(opts))
                    {
                        fstBuilder.Add(term, score);
                        ++terms;
                    }
                    fstBuilder.End();
                    PrintConsole(ConsoleColor.White, $"FST constructed time: {timer.Elapsed}, terms: {terms}, cache size: {opts.CacheSize}, Memory: {Process.GetCurrentProcess().WorkingSet64}, output size: {outputFile.Length}");
                }
            }

            using (var outputFile = new FileStorage(opts.OutputFile))
            {
                if (outputFile.Length < 64 * 1024 * 1024)
                {
                    timer.Restart();
                    var data = new byte[outputFile.Length];
                    outputFile.ReadAll(0, data, 0, data.Length);
                    var fst = FST <int> .FromBytesCompressed(data, outputType);

                    foreach (var(term, score) in ParseFromOptions(opts))
                    {
                        if (!fst.TryMatch(term, out var value) || value != score)
                        {
                            throw new Exception($"Bug at term {term}: {value} != {score}");
                        }
                    }
                    PrintConsole(ConsoleColor.White, $"FST (memory) verification time: {timer.Elapsed}");
                }
            }


            timer.Restart();
            using (var outputFile = new FileStorage(opts.OutputFile))
            {
                using (var fst = new PersistentFST <int>(outputType, outputFile))
                {
                    foreach (var(term, score) in ParseFromOptions(opts))
                    {
                        if (!fst.TryMatch(term, out var value) || value != score)
                        {
                            throw new Exception($"Bug at term {term}: {value} != {score}");
                        }
                    }
                }
            }
            PrintConsole(ConsoleColor.White, $"FST (file)   verification time: {timer.Elapsed}");

            return(0);
        }