public IList <HebrewToken> Lemmatize(string word) { // TODO: Verify word to be non-empty and contain Hebrew characters? var ret = new RealSortedList <HebrewToken>(SortOrder.Desc); MorphData md = m_dict.Lookup(word); if (md != null) { for (int result = 0; result < md.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f)); } } else if (word.EndsWith("'")) // Try ommitting closing Geresh { md = m_dict.Lookup(word.Substring(0, word.Length - 1)); if (md != null) { for (int result = 0; result < md.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f)); } } } byte prefLen = 0; while (true) { // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example) if (word.Length - prefLen < 2) { break; } int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen)); if (prefixMask == 0) // no such prefix { break; } md = m_dict.Lookup(word.Substring(prefLen)); if (md != null && (md.Prefixes & prefixMask) > 0) { for (int result = 0; result < md.Lemmas.Length; result++) { if (((int)HSpell.LingInfo.dmask2ps(md.DescFlags[result]) & prefixMask) > 0) { ret.AddUnique(new HebrewToken(word, prefLen, md.DescFlags[result], md.Lemmas[result], 0.9f)); } } } } return(ret); }
public override bool Equals(object obj) { MorphData o = obj as MorphData; if (o == null) { return(false); } if (DescFlags.Length != o.DescFlags.Length) { return(false); } for (int i = 0; i < DescFlags.Length; i++) { if (DescFlags[i] != o.DescFlags[i] || !Lemmas[i].Equals(o.Lemmas[i])) { return(false); } } return(true); }
public override bool Equals(object obj) { MorphData o = obj as MorphData; if (o == null) { return(false); } if (Lemmas.Count != o.Lemmas.Count) { return(false); } for (int i = 0; i < Lemmas.Count; i++) { if (Lemmas[i] != o.Lemmas[i] || !Lemmas[i].Equals(o.Lemmas[i])) { return(false); } } return(true); }
public IEnumerable <HebrewToken> Lemmatize(string word) { // TODO: Verify word to be non-empty and contain Hebrew characters? MorphData md = m_dict.Lookup(word); if (md != null) { foreach (var result in md.Lemmas) { yield return(new HebrewToken(word, 0, (DMask)(byte)result.DescFlag, result.Lemma, 1.0f) { Type = WordType.HEBREW }); } } else if (word.EndsWith("'")) // Try ommitting closing Geresh { md = m_dict.Lookup(word.Substring(0, word.Length - 1)); if (md != null) { foreach (var result in md.Lemmas) { yield return(new HebrewToken(word, 0, (DMask)(byte)result.DescFlag, result.Lemma, 1.0f) { Type = WordType.HEBREW }); } } } byte prefLen = 0; while (true) { // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example) if (word.Length - prefLen < 2) { break; } int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen)); if (prefixMask == 0) // no such prefix { break; } md = m_dict.Lookup(word.Substring(prefLen)); if (md != null && (md.Prefixes & prefixMask) > 0) { foreach (var result in md.Lemmas) { if (((int)HSpell.LingInfo.dmask2ps((DMask)(byte)result.DescFlag) & prefixMask) > 0) { yield return new HebrewToken(word, prefLen, (DMask)(byte)result.DescFlag, result.Lemma, 0.9f) { Type = WordType.HEBREW_WITH_PREFIX } } ; } } } }