Exemple #1
0
        public void SimpleConstructionWithStringOutputs()
        {
            var inputs = new string[]
            {
                "a banana",
                "a lemon",
                "a mandarine",
                "a mango",
                "an apple",
                "an orange",
            };

            var outputs = new string[]
            {
                "one",
                "two",
                "three",
                "minusone",
                "minustwo",
                "minuseight"
            };

            var fst = new FSTBuilder <string>(FSTStringOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var fst1 = FST <string> .FromBytes(fst.GetBytes(), FSTStringOutput.Instance);

            Verify(fst1, inputs, outputs);

            var fst2 = FST <string> .FromBytesCompressed(fst.GetBytesCompressed(), FSTStringOutput.Instance);

            Verify(fst2, inputs, outputs);
        }
Exemple #2
0
        public void SimpleConstructionWithIntOutputs()
        {
            var inputs = new string[]
            {
                "a banana",
                "a lemon",
                "a mandarine",
                "a mango",
                "an apple",
                "an orange",
            };

            var outputs = new int[]
            {
                1,
                2,
                3,
                -2,
                15,
                8
            };

            var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance);

            Verify(fst1, inputs, outputs);

            var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance);

            Verify(fst2, inputs, outputs);
        }
Exemple #3
0
        public void ConstructionWithIntOutputs()
        {
            var inputs = new string[]
            {
                "a",
                "ab",
                "abilities",
                "ability",
            };

            var outputs = new int[]
            {
                4,
                3134,
                7488,
                1580,
            };

            var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance);

            Verify(fst1, inputs, outputs);

            var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance);

            Verify(fst2, inputs, outputs);
        }
Exemple #4
0
        public void WildcardMatchingTest()
        {
            var inputs = new string[] {
                "Albert Schweitzer Ziekenhuis. Locatie Amstelwijck Heliport",
                "Amsterdam Airfield",
                "Amsterdam Airport",
                "Amsterdam Airport Schiphol",
                "Amsterdam Heliport",
                "Chafei Amsei Airport",
                "New Amsterdam Airport",
                "Schwarzheide/Schipkau Airport"
            };

            var outputs = new int[] {
                43711,
                23465,
                41198,
                2513,
                43207,
                5873,
                41521,
                29065
            };

            var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var expectedTerms = new string[]
            {
                "Amsterdam Airport Schiphol",
                "Schwarzheide/Schipkau Airport"
            };

            var expectedOutputs = new int[]
            {
                2513,
                29065
            };

            var expectedIndex = 0;

            Assert.Equal(expectedOutputs.Length, expectedTerms.Length);

            foreach (var term in fst.Match(new WildcardMatcher("*Schip*", 255)))
            {
                Assert.Equal(expectedTerms[expectedIndex], term);
                Assert.True(fst.TryMatch(term, out int value));
                Assert.Equal(expectedOutputs[expectedIndex], value);
                ++expectedIndex;
            }

            Assert.Equal(expectedOutputs.Length, expectedIndex);
        }
 public void Dispose()
 {
     if (input != null)
     {
         var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(input.ToArray(), output.ToArray());
         Validate(fst); //TODO: Optional
         {
             var fstData = fst.GetBytesCompressed();
             storage.WriteAll(0, fstData, 0, fstData.Length);
         }
         input  = null;
         output = null;
     }
 }
        public void AcceptanceTestWithIntegerOutput()
        {
            var inputs = new string[] {
                "Albert Schweitzer Ziekenhuis. Locatie Amstelwijck Heliport",
                "Amsterdam Airfield",
                "Amsterdam Airport",
                "Amsterdam Airport Schiphol",
                "Amsterdam Heliport",
                "Chafei Amsei Airport",
                "New Amsterdam Airport",
                "Schwarzheide/Schipkau Airport"
            };

            var outputs = new int[] {
                43711,
                23465,
                41198,
                2513,
                43207,
                5873,
                41521,
                29065
            };

            var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance);

            Verify(fst1, inputs, outputs);

            var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance);

            Verify(fst2, inputs, outputs);
        }
Exemple #7
0
        private static int DoBuild(BuildOptions opts)
        {
            var timer = Stopwatch.StartNew();

            if (File.Exists(opts.OutputFile))
            {
                File.Delete(opts.OutputFile);
            }

            timer.Restart();
            var terms = 0;

            using (var outputFile = new FileStorage(opts.OutputFile))
            {
                using (var fstBuilder = new FSTBuilder <int>(outputType, opts.CacheSize, outputFile))
                {
                    fstBuilder.Begin();
                    foreach (var(term, score) in ParseFromOptions(opts))
                    {
                        fstBuilder.Add(term, score);
                        ++terms;
                    }
                    fstBuilder.End();
                    PrintConsole(ConsoleColor.White, $"FST constructed time: {timer.Elapsed}, terms: {terms}, cache size: {opts.CacheSize}, Memory: {Process.GetCurrentProcess().WorkingSet64}, output size: {outputFile.Length}");
                }
            }

            using (var outputFile = new FileStorage(opts.OutputFile))
            {
                if (outputFile.Length < 64 * 1024 * 1024)
                {
                    timer.Restart();
                    var data = new byte[outputFile.Length];
                    outputFile.ReadAll(0, data, 0, data.Length);
                    var fst = FST <int> .FromBytesCompressed(data, outputType);

                    foreach (var(term, score) in ParseFromOptions(opts))
                    {
                        if (!fst.TryMatch(term, out var value) || value != score)
                        {
                            throw new Exception($"Bug at term {term}: {value} != {score}");
                        }
                    }
                    PrintConsole(ConsoleColor.White, $"FST (memory) verification time: {timer.Elapsed}");
                }
            }


            timer.Restart();
            using (var outputFile = new FileStorage(opts.OutputFile))
            {
                using (var fst = new PersistentFST <int>(outputType, outputFile))
                {
                    foreach (var(term, score) in ParseFromOptions(opts))
                    {
                        if (!fst.TryMatch(term, out var value) || value != score)
                        {
                            throw new Exception($"Bug at term {term}: {value} != {score}");
                        }
                    }
                }
            }
            PrintConsole(ConsoleColor.White, $"FST (file)   verification time: {timer.Elapsed}");

            return(0);
        }
Exemple #8
0
        private static int DoBuild(BuildOptions opts)
        {
            var timer   = Stopwatch.StartNew();
            var input   = File.ReadAllLines(opts.InputFile).OrderBy(x => x.Split("->")[0], StringComparer.Ordinal).ToArray();
            var terms   = new string[input.Length];
            var outputs = new int[input.Length];

            for (int i = 0; i < input.Length; ++i)
            {
                var s = input[i].Split("->");
                terms[i]   = s[0];
                outputs[i] = int.Parse(s[1]);
                // Console.WriteLine($"{terms[i]}->{outputs[i]}");
            }
            PrintConsole(ConsoleColor.White, $"Input read term: {terms.Length}, time: {timer.Elapsed}");

            timer.Restart();
            var fst = new FSTBuilder <int>(outputType).FromList(terms, outputs);

            PrintConsole(ConsoleColor.White, $"FST constructed time: {timer.Elapsed}");

            timer.Restart();
            for (int i = 0; i < terms.Length; ++i)
            {
                if (!fst.TryMatch(terms[i], out var value) || value != outputs[i])
                {
                    throw new Exception($"Bug at term {terms[i]}: {value} != {outputs[i]}");
                }
            }
            PrintConsole(ConsoleColor.White, $"FST verification time: {timer.Elapsed}");

            var size = 0;

            timer.Restart();
            if (opts.Format == "Default")
            {
                var fstBytes = fst.GetBytes();
                var data     = new byte[6 + 1 + fstBytes.Length];
                data[0] = (byte)'F';
                data[1] = (byte)'S';
                data[2] = (byte)'T';
                data[3] = (byte)'-';
                data[4] = (byte)'0';
                data[5] = (byte)'1';
                data[6] = (byte)'D';
                Array.Copy(fstBytes, 0, data, 7, fstBytes.Length);
                File.WriteAllBytes(opts.OutputFile, data);
                size = data.Length;
            }
            else if (opts.Format == "Compressed")
            {
                var fstBytes = fst.GetBytesCompressed();
                var data     = new byte[6 + 1 + fstBytes.Length];
                data[0] = (byte)'F';
                data[1] = (byte)'S';
                data[2] = (byte)'T';
                data[3] = (byte)'-';
                data[4] = (byte)'0';
                data[5] = (byte)'1';
                data[6] = (byte)'C';
                Array.Copy(fstBytes, 0, data, 7, fstBytes.Length);
                File.WriteAllBytes(opts.OutputFile, data);
                size = data.Length;
            }
            else if (opts.Format == "Dot")
            {
                throw new NotImplementedException();
            }
            PrintConsole(ConsoleColor.White, $"FST written to the output file: {opts.OutputFile}, size: {size}, time: {timer.Elapsed}");

            return(0);
        }