static void PrintIndexStats() { using var file = File.OpenRead(indexPath); var index = DictionaryIndex <int> .Deserialize(file); Console.WriteLine($"The: {index.Search(TextHasher.CalculateHashCode("the".AsSpan())).Count()}"); }
public void HashFunctionsMustBeIdentical(string text) { var hash1 = TextHasher.CalculateHashCode(text.ToList(), 0, text.Length); var hash2 = TextHasher.CalculateHashCode(text.AsSpan()); Assert.Equal(hash1, hash2); }
static void PrintExternalIndexStats() { var serializer = new ExternalIndexSerializer <int>(); using var index = serializer.Deserialize(externalIndexPath); Console.WriteLine($"The: {index.Search(TextHasher.CalculateHashCode("the".AsSpan())).Count()}"); }
private static IEnumerable <int> Process(IList <char> text) { var cleaned = WikitextCleaner.Clean(text); var tokenized = StateMachineTokenizer.Tokenize(cleaned, lowerCase: true); return(NaiveTokenizer.Tokenize(tokenized) .Select(ft => TextHasher.CalculateHashCode(tokenized, ft.From, ft.To))); }
private BooleanSearchEngine <int> LoadSearchEngine() { var timer = new Stopwatch(); timer.Start(); var index = LoadIndex(); timer.Stop(); Log($"Index loaded in {timer.Elapsed:g}"); return(new BooleanSearchEngine <int>(index, s => TextHasher.CalculateHashCode(s.ToLower().AsSpan()))); }
private static IEnumerable <int> CreateExpectedResult(string words) { return(words.Split().Select(w => TextHasher.CalculateHashCode(w.AsSpan()))); }