public IList <HebrewToken> Lemmatize(string word) { // TODO: Verify word to be non-empty and contain Hebrew characters? var ret = new RealSortedList <HebrewToken>(SortOrder.Desc); MorphData md = m_dict.Lookup(word); if (md != null) { for (int result = 0; result < md.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f)); } } else if (word.EndsWith("'")) // Try ommitting closing Geresh { md = m_dict.Lookup(word.Substring(0, word.Length - 1)); if (md != null) { for (int result = 0; result < md.Lemmas.Length; result++) { ret.AddUnique(new HebrewToken(word, 0, md.DescFlags[result], md.Lemmas[result], 1.0f)); } } } byte prefLen = 0; while (true) { // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example) if (word.Length - prefLen < 2) { break; } int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen)); if (prefixMask == 0) // no such prefix { break; } md = m_dict.Lookup(word.Substring(prefLen)); if (md != null && (md.Prefixes & prefixMask) > 0) { for (int result = 0; result < md.Lemmas.Length; result++) { if (((int)HSpell.LingInfo.dmask2ps(md.DescFlags[result]) & prefixMask) > 0) { ret.AddUnique(new HebrewToken(word, prefLen, md.DescFlags[result], md.Lemmas[result], 0.9f)); } } } } return(ret); }
public bool IsLegalPrefix(string str) { if (m_prefixes.Lookup(str) > 0) { return(true); } return(false); }
static void AddAndIncrement <T>(DictRadix <T> d, string key, T obj, ref int counter) { // Only increment counter if the key doesn't already bool hasKey = true; if (object.Equals(d.Lookup(key), default(T))) { counter++; hasKey = false; } d.AddNode(key, obj); Assert.Equal(counter, d.Count); // Only check insertion if there was one if (d.AllowValueOverride || !hasKey) { Assert.Equal(d.Lookup(key), obj); } }
public IEnumerable <HebrewToken> Lemmatize(string word) { // TODO: Verify word to be non-empty and contain Hebrew characters? MorphData md = m_dict.Lookup(word); if (md != null) { foreach (var result in md.Lemmas) { yield return(new HebrewToken(word, 0, (DMask)(byte)result.DescFlag, result.Lemma, 1.0f) { Type = WordType.HEBREW }); } } else if (word.EndsWith("'")) // Try ommitting closing Geresh { md = m_dict.Lookup(word.Substring(0, word.Length - 1)); if (md != null) { foreach (var result in md.Lemmas) { yield return(new HebrewToken(word, 0, (DMask)(byte)result.DescFlag, result.Lemma, 1.0f) { Type = WordType.HEBREW }); } } } byte prefLen = 0; while (true) { // Make sure there are at least 2 letters left after the prefix (the words של, שלא for example) if (word.Length - prefLen < 2) { break; } int prefixMask = m_prefixes.Lookup(word.Substring(0, ++prefLen)); if (prefixMask == 0) // no such prefix { break; } md = m_dict.Lookup(word.Substring(prefLen)); if (md != null && (md.Prefixes & prefixMask) > 0) { foreach (var result in md.Lemmas) { if (((int)HSpell.LingInfo.dmask2ps((DMask)(byte)result.DescFlag) & prefixMask) > 0) { yield return new HebrewToken(word, prefLen, (DMask)(byte)result.DescFlag, result.Lemma, 0.9f) { Type = WordType.HEBREW_WITH_PREFIX } } ; } } } }