예제 #1
0
        public IList <HebrewToken> Lemmatize(string word)
        {
            // TODO: Verify word to be non-empty and contain Hebrew characters?

            var ret = new RealSortedList <HebrewToken>(SortOrder.Desc);

            MorphData md = m_dict.Lookup(word);

            if (md != null)
            {
                for (int result = 0; result < md.Lemmas.Length; result++)
                {
                    ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f));
                }
            }
            else if (word.EndsWith("'")) // Try ommitting closing Geresh
            {
                md = m_dict.Lookup(word.Substring(0, word.Length - 1));
                if (md != null)
                {
                    for (int result = 0; result < md.Lemmas.Length; result++)
                    {
                        ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f));
                    }
                }
            }

            byte prefLen = 0;

            while (true)
            {
                // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example)
                if (word.Length - prefLen < 2)
                {
                    break;
                }

                int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen));
                if (prefixMask == 0) // no such prefix
                {
                    break;
                }

                md = m_dict.Lookup(word.Substring(prefLen));
                if (md != null && (md.Prefixes & prefixMask) > 0)
                {
                    for (int result = 0; result < md.Lemmas.Length; result++)
                    {
                        if (((int)HSpell.LingInfo.dmask2ps(md.DescFlags[result]) & prefixMask) > 0)
                        {
                            ret.AddUnique(new HebrewToken(word, prefLen, md.DescFlags[result], md.Lemmas[result], 0.9f));
                        }
                    }
                }
            }

            return(ret);
        }
예제 #2
0
        public bool IsLegalPrefix(string str)
        {
            if (m_prefixes.Lookup(str) > 0)
            {
                return(true);
            }

            return(false);
        }
예제 #3
0
        static void AddAndIncrement <T>(DictRadix <T> d, string key, T obj, ref int counter)
        {
            // Only increment counter if the key doesn't already
            bool hasKey = true;

            if (object.Equals(d.Lookup(key), default(T)))
            {
                counter++;
                hasKey = false;
            }

            d.AddNode(key, obj);

            Assert.Equal(counter, d.Count);

            // Only check insertion if there was one
            if (d.AllowValueOverride || !hasKey)
            {
                Assert.Equal(d.Lookup(key), obj);
            }
        }
예제 #4
0
        public IEnumerable <HebrewToken> Lemmatize(string word)
        {
            // TODO: Verify word to be non-empty and contain Hebrew characters?

            MorphData md = m_dict.Lookup(word);

            if (md != null)
            {
                foreach (var result in md.Lemmas)
                {
                    yield return(new HebrewToken(word, 0, (DMask)(byte)result.DescFlag, result.Lemma, 1.0f)
                    {
                        Type = WordType.HEBREW
                    });
                }
            }
            else if (word.EndsWith("'")) // Try ommitting closing Geresh
            {
                md = m_dict.Lookup(word.Substring(0, word.Length - 1));
                if (md != null)
                {
                    foreach (var result in md.Lemmas)
                    {
                        yield return(new HebrewToken(word, 0, (DMask)(byte)result.DescFlag, result.Lemma, 1.0f)
                        {
                            Type = WordType.HEBREW
                        });
                    }
                }
            }

            byte prefLen = 0;

            while (true)
            {
                // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example)
                if (word.Length - prefLen < 2)
                {
                    break;
                }

                int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen));
                if (prefixMask == 0) // no such prefix
                {
                    break;
                }

                md = m_dict.Lookup(word.Substring(prefLen));
                if (md != null && (md.Prefixes & prefixMask) > 0)
                {
                    foreach (var result in md.Lemmas)
                    {
                        if (((int)HSpell.LingInfo.dmask2ps((DMask)(byte)result.DescFlag) & prefixMask) > 0)
                        {
                            yield return new HebrewToken(word, prefLen, (DMask)(byte)result.DescFlag, result.Lemma, 0.9f)
                                   {
                                       Type = WordType.HEBREW_WITH_PREFIX
                                   }
                        }
                        ;
                    }
                }
            }
        }