public void SimpleConstructionWithStringOutputs() { var inputs = new string[] { "a banana", "a lemon", "a mandarine", "a mango", "an apple", "an orange", }; var outputs = new string[] { "one", "two", "three", "minusone", "minustwo", "minuseight" }; var fst = new FSTBuilder <string>(FSTStringOutput.Instance).FromList(inputs, outputs); Verify(fst, inputs, outputs); var fst1 = FST <string> .FromBytes(fst.GetBytes(), FSTStringOutput.Instance); Verify(fst1, inputs, outputs); var fst2 = FST <string> .FromBytesCompressed(fst.GetBytesCompressed(), FSTStringOutput.Instance); Verify(fst2, inputs, outputs); }
public void SimpleConstructionWithIntOutputs() { var inputs = new string[] { "a banana", "a lemon", "a mandarine", "a mango", "an apple", "an orange", }; var outputs = new int[] { 1, 2, 3, -2, 15, 8 }; var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs); Verify(fst, inputs, outputs); var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance); Verify(fst1, inputs, outputs); var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance); Verify(fst2, inputs, outputs); }
public void ConstructionWithIntOutputs() { var inputs = new string[] { "a", "ab", "abilities", "ability", }; var outputs = new int[] { 4, 3134, 7488, 1580, }; var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs); Verify(fst, inputs, outputs); var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance); Verify(fst1, inputs, outputs); var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance); Verify(fst2, inputs, outputs); }
public void WildcardMatchingTest() { var inputs = new string[] { "Albert Schweitzer Ziekenhuis. Locatie Amstelwijck Heliport", "Amsterdam Airfield", "Amsterdam Airport", "Amsterdam Airport Schiphol", "Amsterdam Heliport", "Chafei Amsei Airport", "New Amsterdam Airport", "Schwarzheide/Schipkau Airport" }; var outputs = new int[] { 43711, 23465, 41198, 2513, 43207, 5873, 41521, 29065 }; var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs); Verify(fst, inputs, outputs); var expectedTerms = new string[] { "Amsterdam Airport Schiphol", "Schwarzheide/Schipkau Airport" }; var expectedOutputs = new int[] { 2513, 29065 }; var expectedIndex = 0; Assert.Equal(expectedOutputs.Length, expectedTerms.Length); foreach (var term in fst.Match(new WildcardMatcher("*Schip*", 255))) { Assert.Equal(expectedTerms[expectedIndex], term); Assert.True(fst.TryMatch(term, out int value)); Assert.Equal(expectedOutputs[expectedIndex], value); ++expectedIndex; } Assert.Equal(expectedOutputs.Length, expectedIndex); }
public void Dispose() { if (input != null) { var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(input.ToArray(), output.ToArray()); Validate(fst); //TODO: Optional { var fstData = fst.GetBytesCompressed(); storage.WriteAll(0, fstData, 0, fstData.Length); } input = null; output = null; } }
public void AcceptanceTestWithIntegerOutput() { var inputs = new string[] { "Albert Schweitzer Ziekenhuis. Locatie Amstelwijck Heliport", "Amsterdam Airfield", "Amsterdam Airport", "Amsterdam Airport Schiphol", "Amsterdam Heliport", "Chafei Amsei Airport", "New Amsterdam Airport", "Schwarzheide/Schipkau Airport" }; var outputs = new int[] { 43711, 23465, 41198, 2513, 43207, 5873, 41521, 29065 }; var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs); Verify(fst, inputs, outputs); var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance); Verify(fst1, inputs, outputs); var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance); Verify(fst2, inputs, outputs); }
private static int DoBuild(BuildOptions opts) { var timer = Stopwatch.StartNew(); if (File.Exists(opts.OutputFile)) { File.Delete(opts.OutputFile); } timer.Restart(); var terms = 0; using (var outputFile = new FileStorage(opts.OutputFile)) { using (var fstBuilder = new FSTBuilder <int>(outputType, opts.CacheSize, outputFile)) { fstBuilder.Begin(); foreach (var(term, score) in ParseFromOptions(opts)) { fstBuilder.Add(term, score); ++terms; } fstBuilder.End(); PrintConsole(ConsoleColor.White, $"FST constructed time: {timer.Elapsed}, terms: {terms}, cache size: {opts.CacheSize}, Memory: {Process.GetCurrentProcess().WorkingSet64}, output size: {outputFile.Length}"); } } using (var outputFile = new FileStorage(opts.OutputFile)) { if (outputFile.Length < 64 * 1024 * 1024) { timer.Restart(); var data = new byte[outputFile.Length]; outputFile.ReadAll(0, data, 0, data.Length); var fst = FST <int> .FromBytesCompressed(data, outputType); foreach (var(term, score) in ParseFromOptions(opts)) { if (!fst.TryMatch(term, out var value) || value != score) { throw new Exception($"Bug at term {term}: {value} != {score}"); } } PrintConsole(ConsoleColor.White, $"FST (memory) verification time: {timer.Elapsed}"); } } timer.Restart(); using (var outputFile = new FileStorage(opts.OutputFile)) { using (var fst = new PersistentFST <int>(outputType, outputFile)) { foreach (var(term, score) in ParseFromOptions(opts)) { if (!fst.TryMatch(term, out var value) || value != score) { throw new Exception($"Bug at term {term}: {value} != {score}"); } } } } PrintConsole(ConsoleColor.White, $"FST (file) verification time: {timer.Elapsed}"); return(0); }
private static int DoBuild(BuildOptions opts) { var timer = Stopwatch.StartNew(); var input = File.ReadAllLines(opts.InputFile).OrderBy(x => x.Split("->")[0], StringComparer.Ordinal).ToArray(); var terms = new string[input.Length]; var outputs = new int[input.Length]; for (int i = 0; i < input.Length; ++i) { var s = input[i].Split("->"); terms[i] = s[0]; outputs[i] = int.Parse(s[1]); // Console.WriteLine($"{terms[i]}->{outputs[i]}"); } PrintConsole(ConsoleColor.White, $"Input read term: {terms.Length}, time: {timer.Elapsed}"); timer.Restart(); var fst = new FSTBuilder <int>(outputType).FromList(terms, outputs); PrintConsole(ConsoleColor.White, $"FST constructed time: {timer.Elapsed}"); timer.Restart(); for (int i = 0; i < terms.Length; ++i) { if (!fst.TryMatch(terms[i], out var value) || value != outputs[i]) { throw new Exception($"Bug at term {terms[i]}: {value} != {outputs[i]}"); } } PrintConsole(ConsoleColor.White, $"FST verification time: {timer.Elapsed}"); var size = 0; timer.Restart(); if (opts.Format == "Default") { var fstBytes = fst.GetBytes(); var data = new byte[6 + 1 + fstBytes.Length]; data[0] = (byte)'F'; data[1] = (byte)'S'; data[2] = (byte)'T'; data[3] = (byte)'-'; data[4] = (byte)'0'; data[5] = (byte)'1'; data[6] = (byte)'D'; Array.Copy(fstBytes, 0, data, 7, fstBytes.Length); File.WriteAllBytes(opts.OutputFile, data); size = data.Length; } else if (opts.Format == "Compressed") { var fstBytes = fst.GetBytesCompressed(); var data = new byte[6 + 1 + fstBytes.Length]; data[0] = (byte)'F'; data[1] = (byte)'S'; data[2] = (byte)'T'; data[3] = (byte)'-'; data[4] = (byte)'0'; data[5] = (byte)'1'; data[6] = (byte)'C'; Array.Copy(fstBytes, 0, data, 7, fstBytes.Length); File.WriteAllBytes(opts.OutputFile, data); size = data.Length; } else if (opts.Format == "Dot") { throw new NotImplementedException(); } PrintConsole(ConsoleColor.White, $"FST written to the output file: {opts.OutputFile}, size: {size}, time: {timer.Elapsed}"); return(0); }