private void Check(IBytesRefSorter sorter) { for (int i = 0; i < 100; i++) { byte[] current = new byte[Random().nextInt(256)]; Random().NextBytes(current); sorter.Add(new BytesRef(current)); } // Create two iterators and check that they're aligned with each other. BytesRefIterator i1 = sorter.GetEnumerator(); BytesRefIterator i2 = sorter.GetEnumerator(); // Verify sorter contract. try { sorter.Add(new BytesRef(new byte[1])); fail("expected contract violation."); } catch (InvalidOperationException /*e*/) { // Expected. } BytesRef spare1; BytesRef spare2; while ((spare1 = i1.Next()) != null && (spare2 = i2.Next()) != null) { assertEquals(spare1, spare2); } assertNull(i1.Next()); assertNull(i2.Next()); }
public void TestFieldContents_1() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.EntryIterator; assertNotNull("First element doesn't exist.", spare = it.Next()); assertTrue("First element isn't correct", spare.Utf8ToString().equals("Jerry")); assertNotNull("Second element doesn't exist.", spare = it.Next()); assertTrue("Second element isn't correct", spare.Utf8ToString().equals("Tom")); assertNull("More elements than expected", it.Next()); ld = new LuceneDictionary(indexReader, "contents"); it = ld.EntryIterator; int counter = 2; while (it.Next() != null) { counter--; } assertTrue("Number of words incorrect", counter == 0); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
/// <summary> /// Builds the final automaton from a list of entries. /// </summary> private FST <object> BuildAutomaton(BytesRefSorter sorter) { // Build the automaton. Outputs <object> outputs = NoOutputs.Singleton; object empty = outputs.NoOutput; Builder <object> builder = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInts.DEFAULT, true, 15); BytesRef scratch = new BytesRef(); BytesRef entry; IntsRef scratchIntsRef = new IntsRef(); int count = 0; BytesRefIterator iter = sorter.GetEnumerator(); while ((entry = iter.Next()) != null) { count++; if (scratch.CompareTo(entry) != 0) { builder.Add(Util.Fst.Util.ToIntsRef(entry, scratchIntsRef), empty); scratch.CopyBytes(entry); } } return(count == 0 ? null : builder.Finish()); }
public void TestEmpty() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); writer.Commit(); writer.Dispose(); IndexReader ir = DirectoryReader.Open(dir); IDictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); BytesRefIterator tf = dictionary.EntryIterator; assertNull(tf.Comparator); assertNull(tf.Next()); dir.Dispose(); }
public void TestFieldNonExistent() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "nonexistent_field"); it = ld.EntryIterator; assertNull("More elements than expected", spare = it.Next()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
public void TestFieldAaa() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "aaa"); it = ld.EntryIterator; assertNotNull("First element doesn't exist.", spare = it.Next()); assertTrue("First element isn't correct", spare.Utf8ToString().equals("foo")); assertNull("More elements than expected", it.Next()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
public void TestFieldContents_2() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.EntryIterator; // just iterate through words assertEquals("First element isn't correct", "Jerry", it.Next().Utf8ToString()); assertEquals("Second element isn't correct", "Tom", it.Next().Utf8ToString()); assertNull("Nonexistent element is really null", it.Next()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
/// <summary> /// Creates a new wrapper, wrapping the specified iterator and /// specifying a weight value of <code>1</code> for all terms /// and nullifies associated payloads. /// </summary> public InputIteratorWrapper(BytesRefIterator wrapped) { this.wrapped = wrapped; }
/// <summary> /// Creates a new wrapper, wrapping the specified iterator and /// specifying a weight value of <code>1</code> for all terms. /// </summary> public TermFreqIteratorWrapper(BytesRefIterator wrapped) { this.wrapped = wrapped; }
/// <summary> /// Indexes the data from the given <seealso cref="Dictionary"/>. </summary> /// <param name="dict"> Dictionary to index </param> /// <param name="config"> <seealso cref="IndexWriterConfig"/> to use </param> /// <param name="fullMerge"> whether or not the spellcheck index should be fully merged </param> /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public void IndexDictionary(Dictionary dict, IndexWriterConfig config, bool fullMerge) { lock (modifyCurrentIndexLock) { EnsureOpen(); Directory dir = this.spellIndex; using (var writer = new IndexWriter(dir, config)) { IndexSearcher indexSearcher = ObtainSearcher(); IList <TermsEnum> termsEnums = new List <TermsEnum>(); IndexReader reader = searcher.IndexReader; if (reader.MaxDoc() > 0) { foreach (AtomicReaderContext ctx in reader.Leaves()) { Terms terms = ctx.Reader().Terms(F_WORD); if (terms != null) { termsEnums.Add(terms.Iterator(null)); } } } bool isEmpty = termsEnums.Count == 0; try { BytesRefIterator iter = dict.EntryIterator; BytesRef currentTerm; while ((currentTerm = iter.Next()) != null) { string word = currentTerm.Utf8ToString(); int len = word.Length; if (len < 3) { continue; // too short we bail but "too long" is fine... } if (!isEmpty) { foreach (TermsEnum te in termsEnums) { if (te.SeekExact(currentTerm)) { goto termsContinue; } } } // ok index the word var doc = CreateDocument(word, GetMin(len), GetMax(len)); writer.AddDocument(doc); termsContinue: ; } termsBreak: ; } finally { ReleaseSearcher(indexSearcher); } if (fullMerge) { writer.ForceMerge(1); } } // TODO: this isn't that great, maybe in the future SpellChecker should take // IWC in its ctor / keep its writer open? // also re-open the spell index to see our own changes when the next suggestion // is fetched: SwapSearcher(dir); } }
/// <summary> /// Creates a new wrapper, wrapping the specified iterator and /// specifying a weight value of <c>1</c> for all terms /// and nullifies associated payloads. /// </summary> public InputIteratorWrapper(BytesRefIterator wrapped) { this.wrapped = wrapped; }
public void TestFieldZzz() { try { indexReader = DirectoryReader.Open(store); ld = new LuceneDictionary(indexReader, "zzz"); it = ld.EntryIterator; assertNotNull("First element doesn't exist.", spare = it.Next()); assertEquals("First element isn't correct", "bar", spare.Utf8ToString()); assertNull("More elements than expected", it.Next()); } finally { if (indexReader != null) { indexReader.Dispose(); } } }