public static async Task RunAsync() { // Create a full text index with default settings var index = new FullTextIndexBuilder <int>().Build(); // Index some sample data await index.AddAsync(1, "This is some text associated with A: fizz"); await index.AddAsync(2, "Some buzz text for B"); await index.AddAsync(3, "Text associated with C is both fizz and buzz"); var serializer = new BinarySerializer <int>(); using var stream = new MemoryStream(); // Serialize the index await serializer.SerializeAsync(index, stream, disposeStream : false); // Deserialize the index into a new instance stream.Position = 0; var newIndex = new FullTextIndexBuilder <int>().Build(); await serializer.DeserializeAsync(newIndex, stream, disposeStream : false); // Prove that the new index has the same contents // Emits: 3 items contain text in the new index var matches = newIndex.Search("text"); Console.WriteLine($"{matches.Count()} items contain text in the new index"); }
public static async Task RunAsync() { // Create a full text index with a custom key type var index = new FullTextIndexBuilder <CompositeKey>().Build(); // Index some sample data await index.AddAsync(new CompositeKey(1, 9), "This is some text associated with A: fizz"); await index.AddAsync(new CompositeKey(2, 9), "Some buzz text for B"); await index.AddAsync(new CompositeKey(3, 11), "Text associated with C is both fizz and buzz"); // This would error with: No standard key serializer exists for type CompositeKey - // please provide a custom implementation of IKeySerializer<> when serializing/deserializing. // var serializer = new BinarySerializer<int>(); var serializer = new BinarySerializer <CompositeKey>(new CompositeKeySerializer()); using var stream = new MemoryStream(); // Serialize the index await serializer.SerializeAsync(index, stream, disposeStream : false); // Deserialize the index into a new instance stream.Position = 0; var newIndex = new FullTextIndexBuilder <CompositeKey>().Build(); await serializer.DeserializeAsync(newIndex, stream, disposeStream : false); // Prove that the new index has the same contents and the keys have round-tripped // Emits: only (3, 11) contains Fizz & Buzz var match = newIndex.Search("fizz & buzz").Single(); Console.WriteLine($"Only ({match.Key.UserId}, {match.Key.CompanyId}) contains Fizz & Buzz"); }
public static async Task RunAsync() { var bookIndex = new FullTextIndexBuilder <int>() // Books are indexed by their BookId property, which is an int. .WithObjectTokenization <Book>( options => options .WithKey(b => b.BookId) .WithField("Title", b => b.Title, tokenOptions => tokenOptions.WithStemming()) .WithField("Authors", b => b.Authors) .WithField("Synopsis", b => b.Synopsis, tokenOptions => tokenOptions.WithStemming())) .Build(); await bookIndex.AddRangeAsync(books); // Both books contain "first" - prints "Matched items: 1, 2" var results = bookIndex.Search("first"); Console.WriteLine( "Matched items: " + string.Join(", ", results.Select(i => i.Key)) + " with respective scores: " + string.Join(", ", results.Select(i => i.Score))); // Only first book contains "the" in the title - prints "Matched items: 1" results = bookIndex.Search("title=the"); Console.WriteLine("Matched items: " + string.Join(", ", results.Select(i => i.Key))); }
public static async Task RunAsync() { var index = new FullTextIndexBuilder <int>() .WithObjectTokenization <Customer>(o => o .WithKey(c => c.Id) .WithField("Name", c => c.Name) .WithField("Profile", c => c.ProfileHtml, textExtractor: new XmlTextExtractor()) ) .Build(); }
public async Task ShouldDeserializeV2Index() { var index = new FullTextIndexBuilder <string>().Build(); var serializer = new BinarySerializer <string>(); using (var stream = new MemoryStream(TestResources.v2Index)) { await serializer.DeserializeAsync(index, stream); } index.Search("serialized").Should().HaveCount(1); index.Search("亜").Should().HaveCount(1); }
public static async Task RunAsync() { var index = new FullTextIndexBuilder <string>() .WithDefaultTokenizationOptions(o => o.WithStemming().XmlContent()) .Build(); var wikipediaTests = WikipediaDataLoader.Load(typeof(WikipediaSample)); foreach (var(name, text) in wikipediaTests) { await index.AddAsync(name, text); } }
public async Task SearchingTheIndex_ShouldNotUseTextExtractor() { var index = new FullTextIndexBuilder <int>() .WithIntraNodeTextSupportedAfterIndexDepth(0) .WithTextExtractor <ReversingTextExtractor>() .Build(); await index.AddAsync(1, "Hello"); // The text will have been reversed by the text extractor, but searching won't have that applied index.Search("Hello").Should().HaveCount(0); index.Search("olleh").Should().HaveCount(1); }
public static async Task RunAsync() { // Create a full text index with default settings var index = new FullTextIndexBuilder <string>().Build(); // Index some sample data await index.AddAsync("Item1", "Catastrophe"); await index.AddAsync("Item2", "Casualty"); await index.AddAsync("Item3", "Cat"); // To programatically search the index, create an index navigator instance // from the index snapshot. using (var navigator = index.CreateNavigator()) { // Navigate through the letters 'C' and 'A' (these will be the characters in their // *index normalized* form) navigator.Process("CA".AsSpan()); // There will be no exact matches at the current position in the index, but 3 matches // when considering child matches, i.e. words starting with "ca" // Writes: Exact matches: 0 Exact and child matches: 3 WriteMatchState(navigator); // Navigating through the 'T' of Catastrophe and Cat, but not Casualty navigator.Process('T'); // Writes: Exact matches: 1 Exact and child matches: 2 WriteMatchState(navigator); // Use EnumerateIndexedTokens to reverse-engineer the words that have been indexed // under the current location in the index, in their normalized form. // Writes: // CAT // CATASTROPHE foreach (var token in navigator.EnumerateIndexedTokens()) { Console.WriteLine(token); } // The Process method returns true if navigation was successful, and false otherwise: // Writes: true Console.WriteLine(navigator.Process('A')); // Writes: false Console.WriteLine(navigator.Process("ZOOOOM")); } }
protected static async Task <IFullTextIndex <int> > CreateTestIndexAsync() { var index = new FullTextIndexBuilder <int>() .WithDefaultTokenization(o => o.WithStemming()) .Build(); await index.AddAsync(1, "One two three four five"); await index.AddAsync(2, "Five four three two one"); await index.AddAsync(3, "One Nine six"); await index.AddAsync(4, "During a career spanning more than 20 years, Porcupine Tree earned critical acclaim from critics and fellow musicians, developed a cult following, and became an influence for new artists"); return(index); }
private static async Task <FullTextIndex <string> > SearializeAndDeserializeIndexWithText(string text) { var stream = new MemoryStream(); var serializer = new BinarySerializer <string>(); var index = new FullTextIndexBuilder <string>().Build(); await index.AddAsync("A", text); await serializer.SerializeAsync(index, stream, false); stream.Position = 0; var index2 = new FullTextIndexBuilder <string>().Build(); await serializer.DeserializeAsync(index2, stream); return(index2); }
private static async Task <FullTextIndex <int> > CreateIndexAsync() { var index = new FullTextIndexBuilder <int>() .Build(); index.BeginBatchChange(); var colorProperties = typeof(Color).GetProperties(BindingFlags.Static | BindingFlags.Public); var i = 0; foreach (var prop in colorProperties) { await index.AddAsync(i ++, prop.Name); } await index.CommitBatchChangeAsync(); return(index); }
public async Task AddingItemsToIndex_ShouldUseProvidedTextExtractor() { var textExtractor = new Mock <ITextExtractor>(); textExtractor.SetReturnsDefault <IEnumerable <DocumentTextFragment> >( new[] { new DocumentTextFragment(0, "MOCKED".AsMemory()) }); var index = new FullTextIndexBuilder <int>() .WithIntraNodeTextSupportedAfterIndexDepth(0) .WithTextExtractor(textExtractor.Object) .Build(); await index.AddAsync(1, "Hello"); index.Root.IntraNodeText.ToString().Should().BeEquivalentTo("MOCKED"); }
private async Task <FullTextIndex <string> > CreateWikipediaIndexAsync() { var index = new FullTextIndexBuilder <string>() .WithTextExtractor <XmlTextExtractor>() .WithDefaultTokenization(o => o.WithStemming()) .Build(); var wikipediaTests = WikipediaDataLoader.Load(typeof(FullTextIndexTests)); foreach (var(name, text) in wikipediaTests) { await index.AddAsync(name, text); } // For good measure, index some surrogate pairs await index.AddAsync("Emoji", "Emojis can cause problems 🤷♀️ 🤷🏾♂️"); return(index); }
/// <summary> /// Creates the replacement lookup with sets of replacements keyed by the last character in the search text. /// </summary> /// <param name="replacements">The replacements to create the lookup for.</param> /// <returns>The lookup of replacements, keyed on the last character in the search text.</returns> private static IFullTextIndex <WordReplacement> CreateReplacementLookup( IEnumerable <WordReplacement> replacements) { var index = new FullTextIndexBuilder <WordReplacement>() .WithObjectTokenization <WordReplacement>( i => i.WithKey(item => item) .WithField( "find", x => new string(x.MatchWord.Reverse().ToArray()), to => to.SplitOnPunctuation(false) .CaseInsensitive(false) .AccentInsensitive(false))) .Build(); // This is safe because the only time this could become actually async is if the index // is extended to use some extension that is truly async index.AddRangeAsync(replacements).GetAwaiter().GetResult(); return(index); }
public async Task ShouldRoundTripIndexStructure() { var serializer = new BinarySerializer <string>(); var fileName = Guid.NewGuid().ToString() + ".dat"; using (var stream = File.Open(fileName, FileMode.CreateNew)) { var stopwatch = Stopwatch.StartNew(); var index = await CreateWikipediaIndexAsync(); await serializer.SerializeAsync(index, stream, false); this.output.WriteLine($"Serialized in {stopwatch.ElapsedMilliseconds}ms"); stream.Length.Should().BeGreaterThan(4); var newIndex = new FullTextIndexBuilder <string>().Build(); stream.Position = 0; stopwatch.Restart(); await serializer.DeserializeAsync(newIndex, stream, false); this.output.WriteLine($"Deserialized in {stopwatch.ElapsedMilliseconds}ms"); newIndex.Items.GetIndexedItems().Should().BeEquivalentTo(index.Items.GetIndexedItems()); newIndex.Count.Should().Be(index.Count); newIndex.Root.ToString().Should().Be(index.Root.ToString()); var oldResults = index.Search("test").ToList(); var newResults = newIndex.Search("test").ToList(); oldResults.Should().NotBeEmpty(); newResults.Should().BeEquivalentTo(oldResults); newIndex.Search("🤷♀️").Should().HaveCount(1); } File.Delete(fileName); }
public static async Task RunAsync() { // Create a full text index with default settings var index = new FullTextIndexBuilder <string>().Build(); // Index await index.AddAsync("A", "This is some text associated with A: fizz"); await index.AddAsync("B", "Some buzz text for B"); await index.AddAsync("C", "Text associated with C is both fizz and buzz"); // Search for text containing both Fizz *and* Buzz var results = index.Search("Fizz Buzz").ToList(); // Output: Items with both Fizz and Buzz: 1 Console.WriteLine($"Items with both Fizz and Buzz: {results.Count}"); // Search for text containing both Fizz *or* Buzz results = index.Search("Fizz | Buzz").ToList(); // Outputs: Items with Fizz or Buzz: 3 Console.WriteLine($"Items with Fizz or Buzz: {results.Count}"); }
public FullTextIndexBuilderTests() { this.sut = new FullTextIndexBuilder <int>(); }