Esempio n. 1
0
        public override bool IsValidToken(Token t)
        {
            if (t is HebrewToken)
            {
                HebrewToken ht = t as HebrewToken;

                // Pose a minimum score limit for words
                if (ht.Score < 0.7f)
                {
                    return(false);
                }

                // Pose a higher threshold to verbs (easier to get irrelevant verbs from toleration)
                if ((ht.Mask & DMask.D_TYPEMASK) == DMask.D_VERB && ht.Score < 0.85f)
                {
                    return(false);
                }
            }
            return(true);
        }
Esempio n. 2
0
 internal HebrewValue(HebrewToken token, short value)
 {
     this.token = token;
     this.value = value;
 }
Esempio n. 3
0
 internal HebrewValue(HebrewToken token, int value) {
     this.token = token;
     this.value = value;
 }
Esempio n. 4
0
 public NounWord(HebrewToken ht) : base(ht.Text)
 {
     this.ht = ht;
 }
Esempio n. 5
0
        private void btnCheck_Click(object sender, EventArgs e)
        {
            if (m_lemmatizer == null || !m_lemmatizer.IsInitialized || !(m_lemmatizer is StreamLemmatizer))
            {
                string hspellPath = SelectHSpellFolderPath();
                if (hspellPath == null)
                {
                    return;
                }

                var radix = Loader.LoadDictionaryFromHSpellFolder(hspellPath, true);
                m_lemmatizer = new HebMorph.StreamLemmatizer(radix, false);
            }
            (m_lemmatizer as StreamLemmatizer).SetStream(new System.IO.StringReader(txbCheck.Text));

            string       word   = string.Empty;
            List <Token> tokens = new List <Token>();

            while ((m_lemmatizer as StreamLemmatizer).LemmatizeNextToken(out word, tokens) > 0)
            {
                if (tokens.Count == 0)
                {
                    LoggerWriteLine("{0}: Unrecognized word{1}{2}", word, Environment.NewLine, "------");
                    continue;
                }

                if (tokens.Count == 1 && !(tokens[0] is HebrewToken))
                {
                    LoggerWriteLine("{0}: Not a Hebrew word; detected as {1}{2}{3}", word,
                                    tokens[0].IsNumeric ? "Numeric" : "NonHebrew", Environment.NewLine, "------");
                    continue;
                }

                int    curPrefix = -1;
                string curWord   = string.Empty;
                foreach (Token r in tokens)
                {
                    HebrewToken ht = r as HebrewToken;
                    if (ht == null)
                    {
                        continue;
                    }

                    if (curPrefix != ht.PrefixLength || !curWord.Equals(ht.Text))
                    {
                        curPrefix = ht.PrefixLength;
                        curWord   = ht.Text;
                        if (curPrefix == 0)
                        {
                            LoggerWriteLine("Legal word: {0} (score: {1})", ht.Text, ht.Score);
                        }
                        else
                        {
                            LoggerWriteLine("Legal combination: {0}+{1} (score: {2})", ht.Text.Substring(0, curPrefix),
                                            ht.Text.Substring(curPrefix), ht.Score);
                        }
                    }
                    LoggerWriteLine(ht.ToString());
                }
                LoggerWriteLine("------");
            }
        }
Esempio n. 6
0
 public unknownWord(HebrewToken ht) : base(ht.Text)
 {
     this.ht = ht;
 }
Esempio n. 7
0
 public GufWord(HebrewToken ht) : base(ht.Text)
 {
     if (ht.Mask.HasFlag(DMask.D_FIRST | DMask.D_NUMBASE))
     {
         word = "אנחנו";
     }
     else if (ht.Mask.HasFlag(DMask.D_FIRST))
     {
         word = "אני";
     }
     else if (ht.Mask.HasFlag(DMask.D_SECOND | DMask.D_NUMBASE | DMask.D_MASCULINE))
     {
         word = "אתם";
     }
     else if (ht.Mask.HasFlag(DMask.D_SECOND | DMask.D_MASCULINE))
     {
         word = "אתה";
     }
     else if (ht.Mask.HasFlag(DMask.D_SECOND | DMask.D_NUMBASE | DMask.D_FEMININE))
     {
         word = "אתן";
     }
     else if (ht.Mask.HasFlag(DMask.D_SECOND | DMask.D_MASCULINE))
     {
         word = "את";
     }
     else if (ht.Mask.HasFlag(DMask.D_THIRD | DMask.D_NUMBASE | DMask.D_MASCULINE))
     {
         word = "הם";
     }
     else if (ht.Mask.HasFlag(DMask.D_THIRD | DMask.D_MASCULINE))
     {
         word = "הוא";
     }
     else if (ht.Mask.HasFlag(DMask.D_THIRD | DMask.D_NUMBASE | DMask.D_FEMININE))
     {
         word = "הן";
     }
     else if (ht.Mask.HasFlag(DMask.D_THIRD | DMask.D_MASCULINE))
     {
         word = "היא";
     }
     else if (ht.Mask.HasFlag(DMask.D_OFIRST | DMask.D_NUMBASE))
     {
         word = "שלנו";
     }
     else if (ht.Mask.HasFlag(DMask.D_OFIRST))
     {
         word = "שלי";
     }
     else if (ht.Mask.HasFlag(DMask.D_OSECOND | DMask.D_NUMBASE | DMask.D_MASCULINE))
     {
         word = "שלכם";
     }
     else if (ht.Mask.HasFlag(DMask.D_OSECOND | DMask.D_MASCULINE))
     {
         word = "שלך";
     }
     else if (ht.Mask.HasFlag(DMask.D_OSECOND | DMask.D_NUMBASE | DMask.D_FEMININE))
     {
         word = "שלכן";
     }
     else if (ht.Mask.HasFlag(DMask.D_OSECOND | DMask.D_MASCULINE))
     {
         word = "שלך";
     }
     else if (ht.Mask.HasFlag(DMask.D_OTHIRD | DMask.D_NUMBASE | DMask.D_MASCULINE))
     {
         word = "שלהם";
     }
     else if (ht.Mask.HasFlag(DMask.D_OTHIRD | DMask.D_MASCULINE))
     {
         word = "שלו";
     }
     else if (ht.Mask.HasFlag(DMask.D_OTHIRD | DMask.D_NUMBASE | DMask.D_FEMININE))
     {
         word = "שלהן";
     }
     else if (ht.Mask.HasFlag(DMask.D_OTHIRD | DMask.D_MASCULINE))
     {
         word = "שלה";
     }
     else
     {
         word = ht.Mask.ToString();
     }
 }