public virtual void TestSimpleDictionary() { using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("simple.aff")) { using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("simple.dic")) { Dictionary dictionary = new Dictionary(affixStream, dictStream); assertEquals(3, dictionary.LookupSuffix(new char[] { 'e' }, 0, 1).Length); assertEquals(1, dictionary.LookupPrefix(new char[] { 's' }, 0, 1).Length); Int32sRef ordList = dictionary.LookupWord(new char[] { 'o', 'l', 'r' }, 0, 3); assertNotNull(ordList); assertEquals(1, ordList.Length); BytesRef @ref = new BytesRef(); dictionary.flagLookup.Get(ordList.Int32s[0], @ref); char[] flags = Dictionary.DecodeFlags(@ref); assertEquals(1, flags.Length); ordList = dictionary.LookupWord(new char[] { 'l', 'u', 'c', 'e', 'n' }, 0, 5); assertNotNull(ordList); assertEquals(1, ordList.Length); dictionary.flagLookup.Get(ordList.Int32s[0], @ref); flags = Dictionary.DecodeFlags(@ref); assertEquals(1, flags.Length); } } }
/// <summary> /// Find the stem(s) of the provided word /// </summary> /// <param name="word"> Word to find the stems for </param> /// <returns> List of stems for the word </returns> public IList <CharsRef> Stem(char[] word, int length) { if (dictionary.needsInputCleaning) { scratchSegment.Length = 0; scratchSegment.Append(word, 0, length); string cleaned = dictionary.CleanInput(scratchSegment.ToString(), segment); scratchBuffer = ArrayUtil.Grow(scratchBuffer, cleaned.Length); length = segment.Length; segment.CopyTo(0, scratchBuffer, 0, length); word = scratchBuffer; } List <CharsRef> stems = new List <CharsRef>(); IntsRef forms = dictionary.LookupWord(word, 0, length); if (forms != null) { // TODO: some forms should not be added, e.g. ONLYINCOMPOUND // just because it exists, does not make it valid... for (int i = 0; i < forms.Length; i++) { stems.Add(NewStem(word, length)); } } stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false)); return(stems); }
private IList <CharsRef> DoStem(char[] word, int length, bool caseVariant) { JCG.List <CharsRef> stems = new JCG.List <CharsRef>(); Int32sRef forms = dictionary.LookupWord(word, 0, length); if (forms != null) { for (int i = 0; i < forms.Length; i += formStep) { bool checkKeepCase = caseVariant && dictionary.keepcase != -1; bool checkNeedAffix = dictionary.needaffix != -1; bool checkOnlyInCompound = dictionary.onlyincompound != -1; if (checkKeepCase || checkNeedAffix || checkOnlyInCompound) { dictionary.flagLookup.Get(forms.Int32s[forms.Offset + i], scratch); char[] wordFlags = Dictionary.DecodeFlags(scratch); // we are looking for a case variant, but this word does not allow it if (checkKeepCase && Dictionary.HasFlag(wordFlags, (char)dictionary.keepcase)) { continue; } // we can't add this form, its a pseudostem requiring an affix if (checkNeedAffix && Dictionary.HasFlag(wordFlags, (char)dictionary.needaffix)) { continue; } // we can't add this form, it only belongs inside a compound word if (checkOnlyInCompound && Dictionary.HasFlag(wordFlags, (char)dictionary.onlyincompound)) { continue; } } stems.Add(NewStem(word, length, forms, i)); } } try { stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant)); } catch (Exception bogus) when(bogus.IsIOException()) { throw RuntimeException.Create(bogus); } return(stems); }
public virtual void TestCompressedEmptyAliasDictionary() { using Stream affixStream = this.GetType().getResourceAsStream("compressed-empty-alias.aff"); using Stream dictStream = this.GetType().getResourceAsStream("compressed.dic"); Dictionary dictionary = new Dictionary(affixStream, dictStream); assertEquals(3, dictionary.LookupSuffix(new char[] { 'e' }, 0, 1).Length); assertEquals(1, dictionary.LookupPrefix(new char[] { 's' }, 0, 1).Length); Int32sRef ordList = dictionary.LookupWord(new char[] { 'o', 'l', 'r' }, 0, 3); BytesRef @ref = new BytesRef(); dictionary.flagLookup.Get(ordList.Int32s[0], @ref); char[] flags = Dictionary.DecodeFlags(@ref); assertEquals(1, flags.Length); }
public virtual void TestCompressedBeforeSetDictionary() { using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("compressed-before-set.aff")) { using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("compressed.dic")) { Dictionary dictionary = new Dictionary(affixStream, dictStream); assertEquals(3, dictionary.LookupSuffix(new char[] { 'e' }, 0, 1).Length); assertEquals(1, dictionary.LookupPrefix(new char[] { 's' }, 0, 1).Length); IntsRef ordList = dictionary.LookupWord(new char[] { 'o', 'l', 'r' }, 0, 3); BytesRef @ref = new BytesRef(); dictionary.flagLookup.Get(ordList.Ints[0], @ref); char[] flags = Dictionary.DecodeFlags(@ref); assertEquals(1, flags.Length); } } }