public static async Task RunAsync()
        {
            // Create a full text index with default settings
            var index = new FullTextIndexBuilder <int>().Build();

            // Index some sample data
            await index.AddAsync(1, "This is some text associated with A: fizz");

            await index.AddAsync(2, "Some buzz text for B");

            await index.AddAsync(3, "Text associated with C is both fizz and buzz");

            var serializer = new BinarySerializer <int>();

            using var stream = new MemoryStream();

            // Serialize the index
            await serializer.SerializeAsync(index, stream, disposeStream : false);

            // Deserialize the index into a new instance
            stream.Position = 0;
            var newIndex = new FullTextIndexBuilder <int>().Build();
            await serializer.DeserializeAsync(newIndex, stream, disposeStream : false);

            // Prove that the new index has the same contents
            // Emits: 3 items contain text in the new index
            var matches = newIndex.Search("text");

            Console.WriteLine($"{matches.Count()} items contain text in the new index");
        }
        public static async Task RunAsync()
        {
            // Create a full text index with a custom key type
            var index = new FullTextIndexBuilder <CompositeKey>().Build();

            // Index some sample data
            await index.AddAsync(new CompositeKey(1, 9), "This is some text associated with A: fizz");

            await index.AddAsync(new CompositeKey(2, 9), "Some buzz text for B");

            await index.AddAsync(new CompositeKey(3, 11), "Text associated with C is both fizz and buzz");

            // This would error with: No standard key serializer exists for type CompositeKey -
            //    please provide a custom implementation of IKeySerializer<> when serializing/deserializing.
            // var serializer = new BinarySerializer<int>();

            var serializer = new BinarySerializer <CompositeKey>(new CompositeKeySerializer());

            using var stream = new MemoryStream();

            // Serialize the index
            await serializer.SerializeAsync(index, stream, disposeStream : false);

            // Deserialize the index into a new instance
            stream.Position = 0;
            var newIndex = new FullTextIndexBuilder <CompositeKey>().Build();
            await serializer.DeserializeAsync(newIndex, stream, disposeStream : false);

            // Prove that the new index has the same contents and the keys have round-tripped
            // Emits: only (3, 11) contains Fizz & Buzz
            var match = newIndex.Search("fizz & buzz").Single();

            Console.WriteLine($"Only ({match.Key.UserId}, {match.Key.CompanyId}) contains Fizz & Buzz");
        }
Exemple #3
0
        public static async Task RunAsync()
        {
            var bookIndex = new FullTextIndexBuilder <int>() // Books are indexed by their BookId property, which is an int.
                            .WithObjectTokenization <Book>(
                options => options
                .WithKey(b => b.BookId)
                .WithField("Title", b => b.Title, tokenOptions => tokenOptions.WithStemming())
                .WithField("Authors", b => b.Authors)
                .WithField("Synopsis", b => b.Synopsis, tokenOptions => tokenOptions.WithStemming()))
                            .Build();

            await bookIndex.AddRangeAsync(books);

            // Both books contain "first" - prints "Matched items: 1, 2"
            var results = bookIndex.Search("first");

            Console.WriteLine(
                "Matched items: " +
                string.Join(", ", results.Select(i => i.Key)) +
                " with respective scores: " +
                string.Join(", ", results.Select(i => i.Score)));

            // Only first book contains "the" in the title - prints "Matched items: 1"
            results = bookIndex.Search("title=the");
            Console.WriteLine("Matched items: " + string.Join(", ", results.Select(i => i.Key)));
        }
Exemple #4
0
 public static async Task RunAsync()
 {
     var index = new FullTextIndexBuilder <int>()
                 .WithObjectTokenization <Customer>(o => o
                                                    .WithKey(c => c.Id)
                                                    .WithField("Name", c => c.Name)
                                                    .WithField("Profile", c => c.ProfileHtml, textExtractor: new XmlTextExtractor())
                                                    )
                 .Build();
 }
        public async Task ShouldDeserializeV2Index()
        {
            var index      = new FullTextIndexBuilder <string>().Build();
            var serializer = new BinarySerializer <string>();

            using (var stream = new MemoryStream(TestResources.v2Index))
            {
                await serializer.DeserializeAsync(index, stream);
            }

            index.Search("serialized").Should().HaveCount(1);
            index.Search("亜").Should().HaveCount(1);
        }
Exemple #6
0
        public static async Task RunAsync()
        {
            var index = new FullTextIndexBuilder <string>()
                        .WithDefaultTokenizationOptions(o => o.WithStemming().XmlContent())
                        .Build();

            var wikipediaTests = WikipediaDataLoader.Load(typeof(WikipediaSample));

            foreach (var(name, text) in wikipediaTests)
            {
                await index.AddAsync(name, text);
            }
        }
        public async Task SearchingTheIndex_ShouldNotUseTextExtractor()
        {
            var index = new FullTextIndexBuilder <int>()
                        .WithIntraNodeTextSupportedAfterIndexDepth(0)
                        .WithTextExtractor <ReversingTextExtractor>()
                        .Build();

            await index.AddAsync(1, "Hello");

            // The text will have been reversed by the text extractor, but searching won't have that applied
            index.Search("Hello").Should().HaveCount(0);
            index.Search("olleh").Should().HaveCount(1);
        }
Exemple #8
0
        public static async Task RunAsync()
        {
            // Create a full text index with default settings
            var index = new FullTextIndexBuilder <string>().Build();

            // Index some sample data
            await index.AddAsync("Item1", "Catastrophe");

            await index.AddAsync("Item2", "Casualty");

            await index.AddAsync("Item3", "Cat");

            // To programatically search the index, create an index navigator instance
            // from the index snapshot.
            using (var navigator = index.CreateNavigator())
            {
                // Navigate through the letters 'C' and 'A' (these will be the characters in their
                // *index normalized* form)
                navigator.Process("CA".AsSpan());

                // There will be no exact matches at the current position in the index, but 3 matches
                // when considering child matches, i.e. words starting with "ca"
                // Writes: Exact matches: 0 Exact and child matches: 3
                WriteMatchState(navigator);

                // Navigating through the 'T' of Catastrophe and Cat, but not Casualty
                navigator.Process('T');

                // Writes: Exact matches: 1 Exact and child matches: 2
                WriteMatchState(navigator);

                // Use EnumerateIndexedTokens to reverse-engineer the words that have been indexed
                // under the current location in the index, in their normalized form.
                // Writes:
                // CAT
                // CATASTROPHE
                foreach (var token in navigator.EnumerateIndexedTokens())
                {
                    Console.WriteLine(token);
                }

                // The Process method returns true if navigation was successful, and false otherwise:
                // Writes: true
                Console.WriteLine(navigator.Process('A'));

                // Writes: false
                Console.WriteLine(navigator.Process("ZOOOOM"));
            }
        }
Exemple #9
0
        protected static async Task <IFullTextIndex <int> > CreateTestIndexAsync()
        {
            var index = new FullTextIndexBuilder <int>()
                        .WithDefaultTokenization(o => o.WithStemming())
                        .Build();

            await index.AddAsync(1, "One two three four five");

            await index.AddAsync(2, "Five four three two one");

            await index.AddAsync(3, "One Nine six");

            await index.AddAsync(4, "During a career spanning more than 20 years, Porcupine Tree earned critical acclaim from critics and fellow musicians, developed a cult following, and became an influence for new artists");

            return(index);
        }
        private static async Task <FullTextIndex <string> > SearializeAndDeserializeIndexWithText(string text)
        {
            var stream     = new MemoryStream();
            var serializer = new BinarySerializer <string>();
            var index      = new FullTextIndexBuilder <string>().Build();
            await index.AddAsync("A", text);

            await serializer.SerializeAsync(index, stream, false);

            stream.Position = 0;

            var index2 = new FullTextIndexBuilder <string>().Build();
            await serializer.DeserializeAsync(index2, stream);

            return(index2);
        }
        private static async Task <FullTextIndex <int> > CreateIndexAsync()
        {
            var index = new FullTextIndexBuilder <int>()
                        .Build();

            index.BeginBatchChange();

            var colorProperties = typeof(Color).GetProperties(BindingFlags.Static | BindingFlags.Public);
            var i = 0;

            foreach (var prop in colorProperties)
            {
                await index.AddAsync(i ++, prop.Name);
            }

            await index.CommitBatchChangeAsync();

            return(index);
        }
        public async Task AddingItemsToIndex_ShouldUseProvidedTextExtractor()
        {
            var textExtractor = new Mock <ITextExtractor>();

            textExtractor.SetReturnsDefault <IEnumerable <DocumentTextFragment> >(
                new[]
            {
                new DocumentTextFragment(0, "MOCKED".AsMemory())
            });

            var index = new FullTextIndexBuilder <int>()
                        .WithIntraNodeTextSupportedAfterIndexDepth(0)
                        .WithTextExtractor(textExtractor.Object)
                        .Build();

            await index.AddAsync(1, "Hello");

            index.Root.IntraNodeText.ToString().Should().BeEquivalentTo("MOCKED");
        }
        private async Task <FullTextIndex <string> > CreateWikipediaIndexAsync()
        {
            var index = new FullTextIndexBuilder <string>()
                        .WithTextExtractor <XmlTextExtractor>()
                        .WithDefaultTokenization(o => o.WithStemming())
                        .Build();

            var wikipediaTests = WikipediaDataLoader.Load(typeof(FullTextIndexTests));

            foreach (var(name, text) in wikipediaTests)
            {
                await index.AddAsync(name, text);
            }

            // For good measure, index some surrogate pairs
            await index.AddAsync("Emoji", "Emojis can cause problems 🤷‍♀️ 🤷🏾‍♂️");

            return(index);
        }
Exemple #14
0
        /// <summary>
        /// Creates the replacement lookup with sets of replacements keyed by the last character in the search text.
        /// </summary>
        /// <param name="replacements">The replacements to create the lookup for.</param>
        /// <returns>The lookup of replacements, keyed on the last character in the search text.</returns>
        private static IFullTextIndex <WordReplacement> CreateReplacementLookup(
            IEnumerable <WordReplacement> replacements)
        {
            var index = new FullTextIndexBuilder <WordReplacement>()
                        .WithObjectTokenization <WordReplacement>(
                i => i.WithKey(item => item)
                .WithField(
                    "find",
                    x => new string(x.MatchWord.Reverse().ToArray()),
                    to => to.SplitOnPunctuation(false)
                    .CaseInsensitive(false)
                    .AccentInsensitive(false)))
                        .Build();

            // This is safe because the only time this could become actually async is if the index
            // is extended to use some extension that is truly async
            index.AddRangeAsync(replacements).GetAwaiter().GetResult();

            return(index);
        }
        public async Task ShouldRoundTripIndexStructure()
        {
            var serializer = new BinarySerializer <string>();

            var fileName = Guid.NewGuid().ToString() + ".dat";

            using (var stream = File.Open(fileName, FileMode.CreateNew))
            {
                var stopwatch = Stopwatch.StartNew();
                var index     = await CreateWikipediaIndexAsync();

                await serializer.SerializeAsync(index, stream, false);

                this.output.WriteLine($"Serialized in {stopwatch.ElapsedMilliseconds}ms");

                stream.Length.Should().BeGreaterThan(4);

                var newIndex = new FullTextIndexBuilder <string>().Build();

                stream.Position = 0;

                stopwatch.Restart();
                await serializer.DeserializeAsync(newIndex, stream, false);

                this.output.WriteLine($"Deserialized in {stopwatch.ElapsedMilliseconds}ms");

                newIndex.Items.GetIndexedItems().Should().BeEquivalentTo(index.Items.GetIndexedItems());
                newIndex.Count.Should().Be(index.Count);
                newIndex.Root.ToString().Should().Be(index.Root.ToString());

                var oldResults = index.Search("test").ToList();
                var newResults = newIndex.Search("test").ToList();

                oldResults.Should().NotBeEmpty();
                newResults.Should().BeEquivalentTo(oldResults);

                newIndex.Search("🤷‍♀️").Should().HaveCount(1);
            }

            File.Delete(fileName);
        }
Exemple #16
0
        public static async Task RunAsync()
        {
            // Create a full text index with default settings
            var index = new FullTextIndexBuilder <string>().Build();

            // Index
            await index.AddAsync("A", "This is some text associated with A: fizz");

            await index.AddAsync("B", "Some buzz text for B");

            await index.AddAsync("C", "Text associated with C is both fizz and buzz");

            // Search for text containing both Fizz *and* Buzz
            var results = index.Search("Fizz Buzz").ToList();

            // Output: Items with both Fizz and Buzz: 1
            Console.WriteLine($"Items with both Fizz and Buzz: {results.Count}");

            // Search for text containing both Fizz *or* Buzz
            results = index.Search("Fizz | Buzz").ToList();

            // Outputs: Items with Fizz or Buzz: 3
            Console.WriteLine($"Items with Fizz or Buzz: {results.Count}");
        }
Exemple #17
0
 public FullTextIndexBuilderTests()
 {
     this.sut = new FullTextIndexBuilder <int>();
 }