public void ConstructionWithIntOutputs() { var inputs = new string[] { "a", "ab", "abilities", "ability", }; var outputs = new int[] { 4, 3134, 7488, 1580, }; var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs); Verify(fst, inputs, outputs); var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance); Verify(fst1, inputs, outputs); var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance); Verify(fst2, inputs, outputs); }
public void SimpleConstructionWithStringOutputs() { var inputs = new string[] { "a banana", "a lemon", "a mandarine", "a mango", "an apple", "an orange", }; var outputs = new string[] { "one", "two", "three", "minusone", "minustwo", "minuseight" }; var fst = new FSTBuilder <string>(FSTStringOutput.Instance).FromList(inputs, outputs); Verify(fst, inputs, outputs); var fst1 = FST <string> .FromBytes(fst.GetBytes(), FSTStringOutput.Instance); Verify(fst1, inputs, outputs); var fst2 = FST <string> .FromBytesCompressed(fst.GetBytesCompressed(), FSTStringOutput.Instance); Verify(fst2, inputs, outputs); }
public void SimpleConstructionWithIntOutputs() { var inputs = new string[] { "a banana", "a lemon", "a mandarine", "a mango", "an apple", "an orange", }; var outputs = new int[] { 1, 2, 3, -2, 15, 8 }; var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs); Verify(fst, inputs, outputs); var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance); Verify(fst1, inputs, outputs); var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance); Verify(fst2, inputs, outputs); }
public PersistentDictionaryFst(IPersistentStorage storage, int maxTokenLength, ITextEncoding encoding) { this.maxTokenByteLength = encoding.GetMaxEncodedLength(maxTokenLength); this.storage = storage; this.encoding = encoding; if (storage.Length > 0) { var buffer = new byte[storage.Length]; storage.ReadAll(0, buffer, 0, buffer.Length); this.fst = FST <int> .FromBytesCompressed(buffer, FSTVarIntOutput.Instance); } }
private static int DoPrint(PrintOptions opts) { var timer = Stopwatch.StartNew(); var bytes = File.ReadAllBytes(opts.InputFile); var fstBytes = bytes.Skip(7).ToArray(); var fst = default(FST <int>); if (bytes[6] == 'D') { fst = FST <int> .FromBytes(fstBytes, outputType); } else if (bytes[6] == 'C') { fst = FST <int> .FromBytesCompressed(fstBytes, outputType); } else { throw new NotSupportedException("FST format is not supported or input is not correct"); } PrintConsole(ConsoleColor.White, $"FST read from: {opts.InputFile}, time: {timer.Elapsed}"); timer.Restart(); var terms = 0; foreach (var term in fst.Match(new WildcardMatcher(opts.Pattern, 255))) { if (!fst.TryMatch(term, out int value)) { throw new Exception("This is a bug"); } ++terms; Console.WriteLine($"{term}->{value}"); } PrintConsole(ConsoleColor.White, $"FST print terms: {terms}, time: {timer.Elapsed}"); return(0); }
public void AcceptanceTestWithIntegerOutput() { var inputs = new string[] { "Albert Schweitzer Ziekenhuis. Locatie Amstelwijck Heliport", "Amsterdam Airfield", "Amsterdam Airport", "Amsterdam Airport Schiphol", "Amsterdam Heliport", "Chafei Amsei Airport", "New Amsterdam Airport", "Schwarzheide/Schipkau Airport" }; var outputs = new int[] { 43711, 23465, 41198, 2513, 43207, 5873, 41521, 29065 }; var fst = new FSTBuilder <int>(FSTVarIntOutput.Instance).FromList(inputs, outputs); Verify(fst, inputs, outputs); var fst1 = FST <int> .FromBytes(fst.GetBytes(), FSTVarIntOutput.Instance); Verify(fst1, inputs, outputs); var fst2 = FST <int> .FromBytesCompressed(fst.GetBytesCompressed(), FSTVarIntOutput.Instance); Verify(fst2, inputs, outputs); }
private static int DoBuild(BuildOptions opts) { var timer = Stopwatch.StartNew(); if (File.Exists(opts.OutputFile)) { File.Delete(opts.OutputFile); } timer.Restart(); var terms = 0; using (var outputFile = new FileStorage(opts.OutputFile)) { using (var fstBuilder = new FSTBuilder <int>(outputType, opts.CacheSize, outputFile)) { fstBuilder.Begin(); foreach (var(term, score) in ParseFromOptions(opts)) { fstBuilder.Add(term, score); ++terms; } fstBuilder.End(); PrintConsole(ConsoleColor.White, $"FST constructed time: {timer.Elapsed}, terms: {terms}, cache size: {opts.CacheSize}, Memory: {Process.GetCurrentProcess().WorkingSet64}, output size: {outputFile.Length}"); } } using (var outputFile = new FileStorage(opts.OutputFile)) { if (outputFile.Length < 64 * 1024 * 1024) { timer.Restart(); var data = new byte[outputFile.Length]; outputFile.ReadAll(0, data, 0, data.Length); var fst = FST <int> .FromBytesCompressed(data, outputType); foreach (var(term, score) in ParseFromOptions(opts)) { if (!fst.TryMatch(term, out var value) || value != score) { throw new Exception($"Bug at term {term}: {value} != {score}"); } } PrintConsole(ConsoleColor.White, $"FST (memory) verification time: {timer.Elapsed}"); } } timer.Restart(); using (var outputFile = new FileStorage(opts.OutputFile)) { using (var fst = new PersistentFST <int>(outputType, outputFile)) { foreach (var(term, score) in ParseFromOptions(opts)) { if (!fst.TryMatch(term, out var value) || value != score) { throw new Exception($"Bug at term {term}: {value} != {score}"); } } } } PrintConsole(ConsoleColor.White, $"FST (file) verification time: {timer.Elapsed}"); return(0); }