예제 #1
0
        public void WildcardMatchingTest()
        {
            var inputs = new string[] {
                "Albert Schweitzer Ziekenhuis. Locatie Amstelwijck Heliport",
                "Amsterdam Airfield",
                "Amsterdam Airport",
                "Amsterdam Airport Schiphol",
                "Amsterdam Heliport",
                "Chafei Amsei Airport",
                "New Amsterdam Airport",
                "Schwarzheide/Schipkau Airport"
            };

            var outputs = new int[] {
                43711,
                23465,
                41198,
                2513,
                43207,
                5873,
                41521,
                29065
            };

            var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs);

            Verify(fst, inputs, outputs);

            var expectedTerms = new string[]
            {
                "Amsterdam Airport Schiphol",
                "Schwarzheide/Schipkau Airport"
            };

            var expectedOutputs = new int[]
            {
                2513,
                29065
            };

            var expectedIndex = 0;

            Assert.Equal(expectedOutputs.Length, expectedTerms.Length);

            foreach (var term in fst.Match(new WildcardMatcher("*Schip*", 255)))
            {
                Assert.Equal(expectedTerms[expectedIndex], term);
                Assert.True(fst.TryMatch(term, out int value));
                Assert.Equal(expectedOutputs[expectedIndex], value);
                ++expectedIndex;
            }

            Assert.Equal(expectedOutputs.Length, expectedIndex);
        }
예제 #2
0
        private static int DoBuild(BuildOptions opts)
        {
            var timer   = Stopwatch.StartNew();
            var input   = File.ReadAllLines(opts.InputFile).OrderBy(x => x.Split("->")[0], StringComparer.Ordinal).ToArray();
            var terms   = new string[input.Length];
            var outputs = new int[input.Length];

            for (int i = 0; i < input.Length; ++i)
            {
                var s = input[i].Split("->");
                terms[i]   = s[0];
                outputs[i] = int.Parse(s[1]);
                // Console.WriteLine($"{terms[i]}->{outputs[i]}");
            }
            PrintConsole(ConsoleColor.White, $"Input read term: {terms.Length}, time: {timer.Elapsed}");

            timer.Restart();
            var fst = new FSTBuilder <int>(outputType).FromList(terms, outputs);

            PrintConsole(ConsoleColor.White, $"FST constructed time: {timer.Elapsed}");

            timer.Restart();
            for (int i = 0; i < terms.Length; ++i)
            {
                if (!fst.TryMatch(terms[i], out var value) || value != outputs[i])
                {
                    throw new Exception($"Bug at term {terms[i]}: {value} != {outputs[i]}");
                }
            }
            PrintConsole(ConsoleColor.White, $"FST verification time: {timer.Elapsed}");

            var size = 0;

            timer.Restart();
            if (opts.Format == "Default")
            {
                var fstBytes = fst.GetBytes();
                var data     = new byte[6 + 1 + fstBytes.Length];
                data[0] = (byte)'F';
                data[1] = (byte)'S';
                data[2] = (byte)'T';
                data[3] = (byte)'-';
                data[4] = (byte)'0';
                data[5] = (byte)'1';
                data[6] = (byte)'D';
                Array.Copy(fstBytes, 0, data, 7, fstBytes.Length);
                File.WriteAllBytes(opts.OutputFile, data);
                size = data.Length;
            }
            else if (opts.Format == "Compressed")
            {
                var fstBytes = fst.GetBytesCompressed();
                var data     = new byte[6 + 1 + fstBytes.Length];
                data[0] = (byte)'F';
                data[1] = (byte)'S';
                data[2] = (byte)'T';
                data[3] = (byte)'-';
                data[4] = (byte)'0';
                data[5] = (byte)'1';
                data[6] = (byte)'C';
                Array.Copy(fstBytes, 0, data, 7, fstBytes.Length);
                File.WriteAllBytes(opts.OutputFile, data);
                size = data.Length;
            }
            else if (opts.Format == "Dot")
            {
                throw new NotImplementedException();
            }
            PrintConsole(ConsoleColor.White, $"FST written to the output file: {opts.OutputFile}, size: {size}, time: {timer.Elapsed}");

            return(0);
        }