Пример #1
0
        /// <summary>
        /// Find the stem(s) of the provided word
        /// </summary>
        /// <param name="word"> Word to find the stems for </param>
        /// <param name="length"> length </param>
        /// <returns> <see cref="IList{CharsRef}"/> of stems for the word </returns>
        public IList <CharsRef> Stem(char[] word, int length)
        {
            if (dictionary.needsInputCleaning)
            {
                scratchSegment.Length = 0;
                scratchSegment.Append(word, 0, length);
                string cleaned = dictionary.CleanInput(scratchSegment.ToString(), segment);
                scratchBuffer = ArrayUtil.Grow(scratchBuffer, cleaned.Length);
                length        = segment.Length;
                segment.CopyTo(0, scratchBuffer, 0, length);
                word = scratchBuffer;
            }

            List <CharsRef> stems = new List <CharsRef>();
            Int32sRef       forms = dictionary.LookupWord(word, 0, length);

            if (forms != null)
            {
                // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
                // just because it exists, does not make it valid...
                for (int i = 0; i < forms.Length; i++)
                {
                    stems.Add(NewStem(word, length));
                }
            }
            stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false));
            return(stems);
        }
Пример #2
0
        public virtual void TestSimpleDictionary()
        {
            using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("simple.aff"))
            {
                using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("simple.dic"))
                {

                    Dictionary dictionary = new Dictionary(affixStream, dictStream);
                    assertEquals(3, dictionary.LookupSuffix(new char[] { 'e' }, 0, 1).Length);
                    assertEquals(1, dictionary.LookupPrefix(new char[] { 's' }, 0, 1).Length);
                    IntsRef ordList = dictionary.LookupWord(new char[] { 'o', 'l', 'r' }, 0, 3);
                    assertNotNull(ordList);
                    assertEquals(1, ordList.Length);

                    BytesRef @ref = new BytesRef();
                    dictionary.flagLookup.Get(ordList.Ints[0], @ref);
                    char[] flags = Dictionary.DecodeFlags(@ref);
                    assertEquals(1, flags.Length);

                    ordList = dictionary.LookupWord(new char[] { 'l', 'u', 'c', 'e', 'n' }, 0, 5);
                    assertNotNull(ordList);
                    assertEquals(1, ordList.Length);
                    dictionary.flagLookup.Get(ordList.Ints[0], @ref);
                    flags = Dictionary.DecodeFlags(@ref);
                    assertEquals(1, flags.Length);
                }
            }
        }
Пример #3
0
        private IList <CharsRef> DoStem(char[] word, int length, bool caseVariant)
        {
            JCG.List <CharsRef> stems = new JCG.List <CharsRef>();
            Int32sRef           forms = dictionary.LookupWord(word, 0, length);

            if (forms != null)
            {
                for (int i = 0; i < forms.Length; i += formStep)
                {
                    bool checkKeepCase       = caseVariant && dictionary.keepcase != -1;
                    bool checkNeedAffix      = dictionary.needaffix != -1;
                    bool checkOnlyInCompound = dictionary.onlyincompound != -1;
                    if (checkKeepCase || checkNeedAffix || checkOnlyInCompound)
                    {
                        dictionary.flagLookup.Get(forms.Int32s[forms.Offset + i], scratch);
                        char[] wordFlags = Dictionary.DecodeFlags(scratch);
                        // we are looking for a case variant, but this word does not allow it
                        if (checkKeepCase && Dictionary.HasFlag(wordFlags, (char)dictionary.keepcase))
                        {
                            continue;
                        }
                        // we can't add this form, its a pseudostem requiring an affix
                        if (checkNeedAffix && Dictionary.HasFlag(wordFlags, (char)dictionary.needaffix))
                        {
                            continue;
                        }
                        // we can't add this form, it only belongs inside a compound word
                        if (checkOnlyInCompound && Dictionary.HasFlag(wordFlags, (char)dictionary.onlyincompound))
                        {
                            continue;
                        }
                    }
                    stems.Add(NewStem(word, length, forms, i));
                }
            }
            try
            {
                stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
            }
            catch (Exception bogus) when(bogus.IsIOException())
            {
                throw RuntimeException.Create(bogus);
            }
            return(stems);
        }
Пример #4
0
        public virtual void TestCompressedBeforeSetDictionary()
        {
            using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("compressed-before-set.aff"))
            {
                using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("compressed.dic"))
                {

                    Dictionary dictionary = new Dictionary(affixStream, dictStream);
                    assertEquals(3, dictionary.LookupSuffix(new char[] { 'e' }, 0, 1).Length);
                    assertEquals(1, dictionary.LookupPrefix(new char[] { 's' }, 0, 1).Length);
                    IntsRef ordList = dictionary.LookupWord(new char[] { 'o', 'l', 'r' }, 0, 3);
                    BytesRef @ref = new BytesRef();
                    dictionary.flagLookup.Get(ordList.Ints[0], @ref);
                    char[] flags = Dictionary.DecodeFlags(@ref);
                    assertEquals(1, flags.Length);
                }
            }
        }