コード例 #1
0
        public IList <HebrewToken> Lemmatize(string word)
        {
            // TODO: Verify word to be non-empty and contain Hebrew characters?

            var ret = new RealSortedList <HebrewToken>(SortOrder.Desc);

            MorphData md = m_dict.Lookup(word);

            if (md != null)
            {
                for (int result = 0; result < md.Lemmas.Length; result++)
                {
                    ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f));
                }
            }
            else if (word.EndsWith("'")) // Try ommitting closing Geresh
            {
                md = m_dict.Lookup(word.Substring(0, word.Length - 1));
                if (md != null)
                {
                    for (int result = 0; result < md.Lemmas.Length; result++)
                    {
                        ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f));
                    }
                }
            }

            byte prefLen = 0;

            while (true)
            {
                // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example)
                if (word.Length - prefLen < 2)
                {
                    break;
                }

                int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen));
                if (prefixMask == 0) // no such prefix
                {
                    break;
                }

                md = m_dict.Lookup(word.Substring(prefLen));
                if (md != null && (md.Prefixes & prefixMask) > 0)
                {
                    for (int result = 0; result < md.Lemmas.Length; result++)
                    {
                        if (((int)HSpell.LingInfo.dmask2ps(md.DescFlags[result]) & prefixMask) > 0)
                        {
                            ret.AddUnique(new HebrewToken(word, prefLen, md.DescFlags[result], md.Lemmas[result], 0.9f));
                        }
                    }
                }
            }

            return(ret);
        }
コード例 #2
0
ファイル: MorphData.cs プロジェクト: srdee/HebMorph
        public override bool Equals(object obj)
        {
            MorphData o = obj as MorphData;

            if (o == null)
            {
                return(false);
            }

            if (DescFlags.Length != o.DescFlags.Length)
            {
                return(false);
            }

            for (int i = 0; i < DescFlags.Length; i++)
            {
                if (DescFlags[i] != o.DescFlags[i] || !Lemmas[i].Equals(o.Lemmas[i]))
                {
                    return(false);
                }
            }
            return(true);
        }
コード例 #3
0
        public override bool Equals(object obj)
        {
            MorphData o = obj as MorphData;

            if (o == null)
            {
                return(false);
            }

            if (Lemmas.Count != o.Lemmas.Count)
            {
                return(false);
            }

            for (int i = 0; i < Lemmas.Count; i++)
            {
                if (Lemmas[i] != o.Lemmas[i] || !Lemmas[i].Equals(o.Lemmas[i]))
                {
                    return(false);
                }
            }
            return(true);
        }
コード例 #4
0
ファイル: Lemmatizer.cs プロジェクト: rlebowitz/HebMorph
        public IEnumerable <HebrewToken> Lemmatize(string word)
        {
            // TODO: Verify word to be non-empty and contain Hebrew characters?

            MorphData md = m_dict.Lookup(word);

            if (md != null)
            {
                foreach (var result in md.Lemmas)
                {
                    yield return(new HebrewToken(word, 0, (DMask)(byte)result.DescFlag, result.Lemma, 1.0f)
                    {
                        Type = WordType.HEBREW
                    });
                }
            }
            else if (word.EndsWith("'")) // Try ommitting closing Geresh
            {
                md = m_dict.Lookup(word.Substring(0, word.Length - 1));
                if (md != null)
                {
                    foreach (var result in md.Lemmas)
                    {
                        yield return(new HebrewToken(word, 0, (DMask)(byte)result.DescFlag, result.Lemma, 1.0f)
                        {
                            Type = WordType.HEBREW
                        });
                    }
                }
            }

            byte prefLen = 0;

            while (true)
            {
                // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example)
                if (word.Length - prefLen < 2)
                {
                    break;
                }

                int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen));
                if (prefixMask == 0) // no such prefix
                {
                    break;
                }

                md = m_dict.Lookup(word.Substring(prefLen));
                if (md != null && (md.Prefixes & prefixMask) > 0)
                {
                    foreach (var result in md.Lemmas)
                    {
                        if (((int)HSpell.LingInfo.dmask2ps((DMask)(byte)result.DescFlag) & prefixMask) > 0)
                        {
                            yield return new HebrewToken(word, prefLen, (DMask)(byte)result.DescFlag, result.Lemma, 0.9f)
                                   {
                                       Type = WordType.HEBREW_WITH_PREFIX
                                   }
                        }
                        ;
                    }
                }
            }
        }