Exemple #1
0
        private void Check(IBytesRefSorter sorter)
        {
            for (int i = 0; i < 100; i++)
            {
                byte[] current = new byte[Random().nextInt(256)];
                Random().NextBytes(current);
                sorter.Add(new BytesRef(current));
            }

            // Create two iterators and check that they're aligned with each other.
            BytesRefIterator i1 = sorter.GetEnumerator();
            BytesRefIterator i2 = sorter.GetEnumerator();

            // Verify sorter contract.
            try
            {
                sorter.Add(new BytesRef(new byte[1]));
                fail("expected contract violation.");
            }
            catch (InvalidOperationException /*e*/)
            {
                // Expected.
            }
            BytesRef spare1;
            BytesRef spare2;

            while ((spare1 = i1.Next()) != null && (spare2 = i2.Next()) != null)
            {
                assertEquals(spare1, spare2);
            }
            assertNull(i1.Next());
            assertNull(i2.Next());
        }
Exemple #2
0
        public void TestFieldContents_1()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.EntryIterator;

                assertNotNull("First element doesn't exist.", spare = it.Next());
                assertTrue("First element isn't correct", spare.Utf8ToString().equals("Jerry"));
                assertNotNull("Second element doesn't exist.", spare = it.Next());
                assertTrue("Second element isn't correct", spare.Utf8ToString().equals("Tom"));
                assertNull("More elements than expected", it.Next());

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.EntryIterator;

                int counter = 2;
                while (it.Next() != null)
                {
                    counter--;
                }

                assertTrue("Number of words incorrect", counter == 0);
            }
            finally
            {
                if (indexReader != null)
                {
                    indexReader.Dispose();
                }
            }
        }
        /// <summary>
        /// Builds the final automaton from a list of entries.
        /// </summary>
        private FST <object> BuildAutomaton(BytesRefSorter sorter)
        {
            // Build the automaton.
            Outputs <object> outputs = NoOutputs.Singleton;
            object           empty   = outputs.NoOutput;
            Builder <object> builder = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInts.DEFAULT, true, 15);

            BytesRef         scratch = new BytesRef();
            BytesRef         entry;
            IntsRef          scratchIntsRef = new IntsRef();
            int              count          = 0;
            BytesRefIterator iter           = sorter.GetEnumerator();

            while ((entry = iter.Next()) != null)
            {
                count++;
                if (scratch.CompareTo(entry) != 0)
                {
                    builder.Add(Util.Fst.Util.ToIntsRef(entry, scratchIntsRef), empty);
                    scratch.CopyBytes(entry);
                }
            }

            return(count == 0 ? null : builder.Finish());
        }
Exemple #4
0
        public void TestEmpty()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            writer.Commit();
            writer.Dispose();
            IndexReader      ir         = DirectoryReader.Open(dir);
            IDictionary      dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
            BytesRefIterator tf         = dictionary.EntryIterator;

            assertNull(tf.Comparator);
            assertNull(tf.Next());
            dir.Dispose();
        }
Exemple #5
0
        public void TestFieldNonExistent()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "nonexistent_field");
                it = ld.EntryIterator;

                assertNull("More elements than expected", spare = it.Next());
            }
            finally
            {
                if (indexReader != null)
                {
                    indexReader.Dispose();
                }
            }
        }
Exemple #6
0
        public void TestFieldAaa()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "aaa");
                it = ld.EntryIterator;
                assertNotNull("First element doesn't exist.", spare = it.Next());
                assertTrue("First element isn't correct", spare.Utf8ToString().equals("foo"));
                assertNull("More elements than expected", it.Next());
            }
            finally
            {
                if (indexReader != null)
                {
                    indexReader.Dispose();
                }
            }
        }
Exemple #7
0
        public void TestFieldContents_2()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.EntryIterator;

                // just iterate through words
                assertEquals("First element isn't correct", "Jerry", it.Next().Utf8ToString());
                assertEquals("Second element isn't correct", "Tom", it.Next().Utf8ToString());
                assertNull("Nonexistent element is really null", it.Next());
            }
            finally
            {
                if (indexReader != null)
                {
                    indexReader.Dispose();
                }
            }
        }
Exemple #8
0
 /// <summary>
 /// Creates a new wrapper, wrapping the specified iterator and 
 /// specifying a weight value of <code>1</code> for all terms 
 /// and nullifies associated payloads.
 /// </summary>
 public InputIteratorWrapper(BytesRefIterator wrapped)
 {
     this.wrapped = wrapped;
 }
Exemple #9
0
 /// <summary>
 /// Creates a new wrapper, wrapping the specified iterator and
 /// specifying a weight value of <code>1</code> for all terms.
 /// </summary>
 public TermFreqIteratorWrapper(BytesRefIterator wrapped)
 {
     this.wrapped = wrapped;
 }
Exemple #10
0
        /// <summary>
        /// Indexes the data from the given <seealso cref="Dictionary"/>. </summary>
        /// <param name="dict"> Dictionary to index </param>
        /// <param name="config"> <seealso cref="IndexWriterConfig"/> to use </param>
        /// <param name="fullMerge"> whether or not the spellcheck index should be fully merged </param>
        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
        public void IndexDictionary(Dictionary dict, IndexWriterConfig config, bool fullMerge)
        {
            lock (modifyCurrentIndexLock)
            {
                EnsureOpen();
                Directory dir = this.spellIndex;
                using (var writer = new IndexWriter(dir, config))
                {
                    IndexSearcher     indexSearcher = ObtainSearcher();
                    IList <TermsEnum> termsEnums    = new List <TermsEnum>();

                    IndexReader reader = searcher.IndexReader;
                    if (reader.MaxDoc() > 0)
                    {
                        foreach (AtomicReaderContext ctx in reader.Leaves())
                        {
                            Terms terms = ctx.Reader().Terms(F_WORD);
                            if (terms != null)
                            {
                                termsEnums.Add(terms.Iterator(null));
                            }
                        }
                    }

                    bool isEmpty = termsEnums.Count == 0;

                    try
                    {
                        BytesRefIterator iter = dict.EntryIterator;
                        BytesRef         currentTerm;

                        while ((currentTerm = iter.Next()) != null)
                        {
                            string word = currentTerm.Utf8ToString();
                            int    len  = word.Length;
                            if (len < 3)
                            {
                                continue; // too short we bail but "too long" is fine...
                            }

                            if (!isEmpty)
                            {
                                foreach (TermsEnum te in termsEnums)
                                {
                                    if (te.SeekExact(currentTerm))
                                    {
                                        goto termsContinue;
                                    }
                                }
                            }

                            // ok index the word
                            var doc = CreateDocument(word, GetMin(len), GetMax(len));
                            writer.AddDocument(doc);
termsContinue:
                            ;
                        }
termsBreak:
                        ;
                    }
                    finally
                    {
                        ReleaseSearcher(indexSearcher);
                    }
                    if (fullMerge)
                    {
                        writer.ForceMerge(1);
                    }
                }
                // TODO: this isn't that great, maybe in the future SpellChecker should take
                // IWC in its ctor / keep its writer open?

                // also re-open the spell index to see our own changes when the next suggestion
                // is fetched:
                SwapSearcher(dir);
            }
        }
Exemple #11
0
 /// <summary>
 /// Creates a new wrapper, wrapping the specified iterator and
 /// specifying a weight value of <c>1</c> for all terms
 /// and nullifies associated payloads.
 /// </summary>
 public InputIteratorWrapper(BytesRefIterator wrapped)
 {
     this.wrapped = wrapped;
 }
 /// <summary>
 /// Creates a new wrapper, wrapping the specified iterator and 
 /// specifying a weight value of <code>1</code> for all terms.
 /// </summary>
 public TermFreqIteratorWrapper(BytesRefIterator wrapped)
 {
     this.wrapped = wrapped;
 }
        public void TestFieldNonExistent()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "nonexistent_field");
                it = ld.EntryIterator;

                assertNull("More elements than expected", spare = it.Next());
            }
            finally
            {
                if (indexReader != null) { indexReader.Dispose(); }
            }
        }
        public void TestFieldZzz()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "zzz");
                it = ld.EntryIterator;

                assertNotNull("First element doesn't exist.", spare = it.Next());
                assertEquals("First element isn't correct", "bar", spare.Utf8ToString());
                assertNull("More elements than expected", it.Next());
            }
            finally
            {
                if (indexReader != null) { indexReader.Dispose(); }
            }
        }
        public void TestFieldContents_2()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.EntryIterator;

                // just iterate through words
                assertEquals("First element isn't correct", "Jerry", it.Next().Utf8ToString());
                assertEquals("Second element isn't correct", "Tom", it.Next().Utf8ToString());
                assertNull("Nonexistent element is really null", it.Next());
            }
            finally
            {
                if (indexReader != null) { indexReader.Dispose(); }
            }
        }
        public void TestFieldContents_1()
        {
            try
            {
                indexReader = DirectoryReader.Open(store);

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.EntryIterator;

                assertNotNull("First element doesn't exist.", spare = it.Next());
                assertTrue("First element isn't correct", spare.Utf8ToString().equals("Jerry"));
                assertNotNull("Second element doesn't exist.", spare = it.Next());
                assertTrue("Second element isn't correct", spare.Utf8ToString().equals("Tom"));
                assertNull("More elements than expected", it.Next());

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.EntryIterator;

                int counter = 2;
                while (it.Next() != null)
                {
                    counter--;
                }

                assertTrue("Number of words incorrect", counter == 0);
            }
            finally
            {
                if (indexReader != null) { indexReader.Dispose(); }
            }
        }