public IList <HebrewToken> Lemmatize(string word) { // TODO: Verify word to be non-empty and contain Hebrew characters? var ret = new RealSortedList <HebrewToken>(SortOrder.Desc); MorphData md = m_dict.Lookup(word); if (md != null) { for (int result = 0; result < md.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f)); } } else if (word.EndsWith("'")) // Try ommitting closing Geresh { md = m_dict.Lookup(word.Substring(0, word.Length - 1)); if (md != null) { for (int result = 0; result < md.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f)); } } } byte prefLen = 0; while (true) { // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example) if (word.Length - prefLen < 2) { break; } int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen)); if (prefixMask == 0) // no such prefix { break; } md = m_dict.Lookup(word.Substring(prefLen)); if (md != null && (md.Prefixes & prefixMask) > 0) { for (int result = 0; result < md.Lemmas.Length; result++) { if (((int)HSpell.LingInfo.dmask2ps(md.DescFlags[result]) & prefixMask) > 0) { ret.AddUnique(new HebrewToken(word, prefLen, md.DescFlags[result], md.Lemmas[result], 0.9f)); } } } } return(ret); }
public IList <HebrewToken> LemmatizeTolerant(string word) { // TODO: Verify word to be non-empty and contain Hebrew characters? RealSortedList <HebrewToken> ret = new RealSortedList <HebrewToken>(SortOrder.Desc); List <DictRadix <MorphData> .LookupResult> tolerated = m_dict.LookupTolerant(word, LookupTolerators.TolerateEmKryiaAll); if (tolerated != null) { foreach (DictRadix <MorphData> .LookupResult lr in tolerated) { for (int result = 0; result < lr.Data.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(lr.Word, 0, lr.Data.DescFlags[result], lr.Data.Lemmas[result], lr.Score)); } } } byte prefLen = 0; while (true) { // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example) if (word.Length - prefLen < 2) { break; } int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen)); if (prefixMask == 0) // no such prefix { break; } tolerated = m_dict.LookupTolerant(word.Substring(prefLen), LookupTolerators.TolerateEmKryiaAll); if (tolerated != null) { foreach (DictRadix <MorphData> .LookupResult lr in tolerated) { for (int result = 0; result < lr.Data.Lemmas.Length; result++) { if (((int)HSpell.LingInfo.dmask2ps(lr.Data.DescFlags[result]) & prefixMask) > 0) { ret.AddUnique(new HebrewToken(word.Substring(0, prefLen) + lr.Word, prefLen, lr.Data.DescFlags[result], lr.Data.Lemmas[result], lr.Score * 0.9f)); } } } } } return(ret); }
public IList<HebrewToken> LemmatizeTolerant(string word) { // TODO: Verify word to be non-empty and contain Hebrew characters? RealSortedList<HebrewToken> ret = new RealSortedList<HebrewToken>(SortOrder.Desc); List<DictRadix<MorphData>.LookupResult> tolerated = m_dict.LookupTolerant(word, LookupTolerators.TolerateEmKryiaAll); if (tolerated != null) { foreach (DictRadix<MorphData>.LookupResult lr in tolerated) { for (int result = 0; result < lr.Data.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(lr.Word, 0, lr.Data.DescFlags[result], lr.Data.Lemmas[result], lr.Score)); } } } byte prefLen = 0; while (true) { // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example) if (word.Length - prefLen < 2) break; int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen)); if (prefixMask == 0) // no such prefix break; tolerated = m_dict.LookupTolerant(word.Substring(prefLen), LookupTolerators.TolerateEmKryiaAll); if (tolerated != null) { foreach (DictRadix<MorphData>.LookupResult lr in tolerated) { for (int result = 0; result < lr.Data.Lemmas.Length; result++) { if (((int)HSpell.LingInfo.dmask2ps(lr.Data.DescFlags[result]) & prefixMask) > 0) ret.AddUnique(new HebrewToken(word.Substring(0, prefLen) + lr.Word, prefLen, lr.Data.DescFlags[result], lr.Data.Lemmas[result], lr.Score * 0.9f)); } } } } return ret; }
public IList<HebrewToken> Lemmatize(string word) { // TODO: Verify word to be non-empty and contain Hebrew characters? var ret = new RealSortedList<HebrewToken>(SortOrder.Desc); MorphData md = m_dict.Lookup(word); if (md != null) { for (int result = 0; result < md.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f)); } } else if (word.EndsWith("'")) // Try ommitting closing Geresh { md = m_dict.Lookup(word.Substring(0, word.Length - 1)); if (md != null) { for (int result = 0; result < md.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f)); } } } byte prefLen = 0; while (true) { // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example) if (word.Length - prefLen < 2) break; int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen)); if (prefixMask == 0) // no such prefix break; md = m_dict.Lookup(word.Substring(prefLen)); if (md != null && (md.Prefixes & prefixMask) > 0) { for (int result = 0; result < md.Lemmas.Length; result++) { if (((int)HSpell.LingInfo.dmask2ps(md.DescFlags[result]) & prefixMask) > 0) ret.AddUnique(new HebrewToken(word, prefLen, md.DescFlags[result], md.Lemmas[result], 0.9f)); } } } return ret; }