public void TestExternalTrie() { ArrayList random_strings = new ArrayList(); byte[] rbytes = new byte[128]; Random rnd = new Random(); DateTime start; try { ExternalTrie trie = new ExternalTrie("test.trie"); using ( trie ) { start = DateTime.Now; int dummy; for (int i = 0; i < 1000; ++i) { rnd.NextBytes(rbytes); string rstr = Encoding.UTF8.GetString(rbytes); if (rstr.Length > 0) { random_strings.Add(rstr); trie.AddString(rstr, out dummy); } } } Console.WriteLine("Insertion of 1000 random strings into external trie took " + (DateTime.Now - start).ToString()); trie = new ExternalTrie("test.trie"); using ( trie ) { start = DateTime.Now; foreach (string rstr in random_strings) { if (trie.GetStringIndex(rstr) < 0) { trie.GetStringIndex(rstr); throw new Exception("One of strings, earlier inserted into the trie, could not be found"); } } } Console.WriteLine("Searching for 1000 strings in the trie took " + (DateTime.Now - start).ToString()); } finally { File.Delete("test.trie"); } }
public static int GetTermId(string token) { if (_tokenTrie == null) { _tokenTrie = new ExternalTrie(OMEnv.TokenTreeFileName, OMEnv.CachingStrategy); _tokenTrie.NodesCacheSize = 4095; } int index; try { _tokenTrie.AddString(token, out index); } catch (ArgumentOutOfRangeException e) // bad index, can't seek in file { throw new FormatException("Token trie is corrupted. " + e.Message); } catch (IOException e) // other IO problems { throw new FormatException("Can't operate token trie. " + e.Message); } return(index); }