public override bool IsValidToken(Token t) { if (t is HebrewToken) { HebrewToken ht = t as HebrewToken; // Pose a minimum score limit for words if (ht.Score < 0.7f) { return(false); } // Pose a higher threshold to verbs (easier to get irrelevant verbs from toleration) if ((ht.Mask & DMask.D_TYPEMASK) == DMask.D_VERB && ht.Score < 0.85f) { return(false); } } return(true); }
internal HebrewValue(HebrewToken token, short value) { this.token = token; this.value = value; }
internal HebrewValue(HebrewToken token, int value) { this.token = token; this.value = value; }
public NounWord(HebrewToken ht) : base(ht.Text) { this.ht = ht; }
private void btnCheck_Click(object sender, EventArgs e) { if (m_lemmatizer == null || !m_lemmatizer.IsInitialized || !(m_lemmatizer is StreamLemmatizer)) { string hspellPath = SelectHSpellFolderPath(); if (hspellPath == null) { return; } var radix = Loader.LoadDictionaryFromHSpellFolder(hspellPath, true); m_lemmatizer = new HebMorph.StreamLemmatizer(radix, false); } (m_lemmatizer as StreamLemmatizer).SetStream(new System.IO.StringReader(txbCheck.Text)); string word = string.Empty; List <Token> tokens = new List <Token>(); while ((m_lemmatizer as StreamLemmatizer).LemmatizeNextToken(out word, tokens) > 0) { if (tokens.Count == 0) { LoggerWriteLine("{0}: Unrecognized word{1}{2}", word, Environment.NewLine, "------"); continue; } if (tokens.Count == 1 && !(tokens[0] is HebrewToken)) { LoggerWriteLine("{0}: Not a Hebrew word; detected as {1}{2}{3}", word, tokens[0].IsNumeric ? "Numeric" : "NonHebrew", Environment.NewLine, "------"); continue; } int curPrefix = -1; string curWord = string.Empty; foreach (Token r in tokens) { HebrewToken ht = r as HebrewToken; if (ht == null) { continue; } if (curPrefix != ht.PrefixLength || !curWord.Equals(ht.Text)) { curPrefix = ht.PrefixLength; curWord = ht.Text; if (curPrefix == 0) { LoggerWriteLine("Legal word: {0} (score: {1})", ht.Text, ht.Score); } else { LoggerWriteLine("Legal combination: {0}+{1} (score: {2})", ht.Text.Substring(0, curPrefix), ht.Text.Substring(curPrefix), ht.Score); } } LoggerWriteLine(ht.ToString()); } LoggerWriteLine("------"); } }
public unknownWord(HebrewToken ht) : base(ht.Text) { this.ht = ht; }
public GufWord(HebrewToken ht) : base(ht.Text) { if (ht.Mask.HasFlag(DMask.D_FIRST | DMask.D_NUMBASE)) { word = "אנחנו"; } else if (ht.Mask.HasFlag(DMask.D_FIRST)) { word = "אני"; } else if (ht.Mask.HasFlag(DMask.D_SECOND | DMask.D_NUMBASE | DMask.D_MASCULINE)) { word = "אתם"; } else if (ht.Mask.HasFlag(DMask.D_SECOND | DMask.D_MASCULINE)) { word = "אתה"; } else if (ht.Mask.HasFlag(DMask.D_SECOND | DMask.D_NUMBASE | DMask.D_FEMININE)) { word = "אתן"; } else if (ht.Mask.HasFlag(DMask.D_SECOND | DMask.D_MASCULINE)) { word = "את"; } else if (ht.Mask.HasFlag(DMask.D_THIRD | DMask.D_NUMBASE | DMask.D_MASCULINE)) { word = "הם"; } else if (ht.Mask.HasFlag(DMask.D_THIRD | DMask.D_MASCULINE)) { word = "הוא"; } else if (ht.Mask.HasFlag(DMask.D_THIRD | DMask.D_NUMBASE | DMask.D_FEMININE)) { word = "הן"; } else if (ht.Mask.HasFlag(DMask.D_THIRD | DMask.D_MASCULINE)) { word = "היא"; } else if (ht.Mask.HasFlag(DMask.D_OFIRST | DMask.D_NUMBASE)) { word = "שלנו"; } else if (ht.Mask.HasFlag(DMask.D_OFIRST)) { word = "שלי"; } else if (ht.Mask.HasFlag(DMask.D_OSECOND | DMask.D_NUMBASE | DMask.D_MASCULINE)) { word = "שלכם"; } else if (ht.Mask.HasFlag(DMask.D_OSECOND | DMask.D_MASCULINE)) { word = "שלך"; } else if (ht.Mask.HasFlag(DMask.D_OSECOND | DMask.D_NUMBASE | DMask.D_FEMININE)) { word = "שלכן"; } else if (ht.Mask.HasFlag(DMask.D_OSECOND | DMask.D_MASCULINE)) { word = "שלך"; } else if (ht.Mask.HasFlag(DMask.D_OTHIRD | DMask.D_NUMBASE | DMask.D_MASCULINE)) { word = "שלהם"; } else if (ht.Mask.HasFlag(DMask.D_OTHIRD | DMask.D_MASCULINE)) { word = "שלו"; } else if (ht.Mask.HasFlag(DMask.D_OTHIRD | DMask.D_NUMBASE | DMask.D_FEMININE)) { word = "שלהן"; } else if (ht.Mask.HasFlag(DMask.D_OTHIRD | DMask.D_MASCULINE)) { word = "שלה"; } else { word = ht.Mask.ToString(); } }