示例#1
0
        /// <summary>
        /// Add a word to tree
        /// </summary>
        /// <param name="word">Word</param>
        /// <param name="freq">Word's usage frequency</param>
        /// <param name="posTag">Word's pos</param>
        /// <param name="fileName">File name</param>
        public bool AddWord(string word, int freq, PersianPOSTag posTag, string fileName)
        {
            try
            {
                int           existingFreq;
                PersianPOSTag existingPOS;
                if (this.Contain(word, out existingFreq, out existingPOS))
                {
                    if (existingPOS.Has(posTag) && existingFreq == freq)
                    {
                        return(false);
                    }
                    else
                    {
                        RemoveFromFile(word);
                    }
                }

                AddWordToMemory(word, freq, posTag.ToString());
                return(AddWordToFile(word, freq, posTag.ToString(), fileName));
            }
            catch (Exception ex)
            {
                return(false);
            }
        }
示例#2
0
 /// <summary>
 /// Add a word to tree
 /// </summary>
 /// <param name="word">Word</param>
 /// <param name="freq">Word's usage frequency</param>
 /// <param name="posTag">Word's pos</param>
 public bool AddWordBlind(string word, int freq, PersianPOSTag posTag)
 {
     try
     {
         AddWordToMemory(word, freq, posTag.ToString());
         return(true);
     }
     catch (Exception ex)
     {
         throw ex;
     }
 }
示例#3
0
        public Token(string lexeme, PersianPOSTag persianPOSTag, string lemma, NumberType numberType, int length, int startPos, ENUM_TENSE_PERSON person)
        {
            Lexeme = lexeme;
            POSTag = persianPOSTag;
            Lemma = lemma;

            Number = numberType;
            Length = length;
            Lemma = lemma;
            Lexeme = lexeme;
            Person = person;
            POSTag = persianPOSTag;
            StartPos = startPos;
        }
示例#4
0
        public Token(string lexeme, PersianPOSTag persianPOSTag, string lemma, NumberType numberType, int length, int startPos, ENUM_TENSE_PERSON person)
        {
            Lexeme = lexeme;
            POSTag = persianPOSTag;
            Lemma  = lemma;

            Number   = numberType;
            Length   = length;
            Lemma    = lemma;
            Lexeme   = lexeme;
            Person   = person;
            POSTag   = persianPOSTag;
            StartPos = startPos;
        }
示例#5
0
        /// <summary>
        /// Check if a word exists
        /// </summary>
        /// <param name="word">Word</param>
        /// <param name="posTag">Word's POS tag</param>
        /// <returns>If the dictionary contains the word, returns true, else returns false.</returns>
        public bool Contain(string word, out PersianPOSTag posTag)
        {
            posTag = PersianPOSTag.UserPOS;

            NodeWithFreqandPOS leaf = IndexOf(word);

            if (leaf == null || !leaf.IsEndOfWord)
            {
                return(false);
            }

            posTag = leaf.POSTag.ToEnum <PersianPOSTag>();

            return(true);
        }
示例#6
0
        /// <summary>
        /// Add a correct word to dictionary
        /// </summary>
        /// <param name="userSelectedWord">Form of word which user select to add to dictionary</param>
        /// <param name="originalWord">Original word without lemmatization</param>
        ///<returns>True if word is successfully added, otherwise False</returns>
        private void AddToDictionary(string userSelectedWord, string originalWord)
        {
            string suffix = originalWord.Remove(0, userSelectedWord.Length);

            PersianPOSTag extractedPOSTag = PersianPOSTag.UserPOS;

            if (suffix.Length > 0)
            {
                PersianSuffixesCategory suffixCategory = InflectionAnalyser.SuffixCategory(suffix);
                extractedPOSTag = InflectionAnalyser.AcceptingPOS(suffixCategory);

                extractedPOSTag = extractedPOSTag.Set(PersianPOSTag.UserPOS);
            }

            AddWordToFinalList(userSelectedWord, m_wordList[userSelectedWord], extractedPOSTag);
        }
示例#7
0
        private void AddWordToFinalList(string word, int freq, PersianPOSTag pos)
        {
            FreqPOSPair pair;

            if (m_finalList.ContainsKey(word))
            {
                pair.freq = m_finalList[word].freq + 1;
                pair.pos  = m_finalList[word].pos.Set(pos);

                m_finalList[word] = pair;
            }
            else
            {
                pair.freq = freq;
                pair.pos  = pos;

                m_finalList.Add(word, pair);
            }
        }
示例#8
0
        private static PersianPOSTag GetMostFrequent(PersianPOSTag possibletags)
        {
            // In order of frequency
            if ((possibletags & PersianPOSTag.N) == PersianPOSTag.N)
            {
                return(PersianPOSTag.N);
            }
            if ((possibletags & PersianPOSTag.P) == PersianPOSTag.P)
            {
                return(PersianPOSTag.P);
            }
            if ((possibletags & PersianPOSTag.PUNC) == PersianPOSTag.PUNC)
            {
                return(PersianPOSTag.PUNC);
            }
            if ((possibletags & PersianPOSTag.V) == PersianPOSTag.V)
            {
                return(PersianPOSTag.V);
            }
            if ((possibletags & PersianPOSTag.AJ) == PersianPOSTag.AJ)
            {
                return(PersianPOSTag.AJ);
            }
            if ((possibletags & PersianPOSTag.CONJ) == PersianPOSTag.CONJ)
            {
                return(PersianPOSTag.CONJ);
            }
            if ((possibletags & PersianPOSTag.NUM) == PersianPOSTag.NUM)
            {
                return(PersianPOSTag.NUM);
            }
            if ((possibletags & PersianPOSTag.NUM) == PersianPOSTag.NUM)
            {
                return(PersianPOSTag.NUM);
            }

            return(PersianPOSTag.UserPOS);
        }
示例#9
0
        // In fact, it's just a simple stemmer, not a _lemmatizer
        private string GetLemma(string token, out PersianPOSTag posTag)
        {
            string lemma = token;

            if (m_LemmaDic.ContainsKey(token))
            {
                KeyValuePair <string, PersianPOSTag> valuePair = m_LemmaDic[token];
                posTag = valuePair.Value;
                return(valuePair.Key);
            }
            posTag = PersianPOSTag.UserPOS;

            var rpmpis = _lemmatizer.MatchForSuffix(token);

            for (int index = rpmpis.Length - 1; index >= 0; index--)
            {
                var rpmpi = rpmpis[index];
                if (Mapper.ContainsKey(rpmpi.BaseWord))
                {
                    lemma = rpmpi.BaseWord;

                    // Manual rules go here, probably from a seprate fromatted file
                    if (Mapper[rpmpi.BaseWord] == PersianPOSTag.N && rpmpi.Suffix == "ی")
                    {
                        posTag = PersianPOSTag.AJ;
                        break;
                    }

                    posTag = Mapper[rpmpi.BaseWord];
                    break;
                }
                PersianPOSTag possibletags = _lemmatizer.AcceptingPOS(_lemmatizer.SuffixCategory(rpmpi.Suffix));
                posTag = GetMostFrequent(possibletags);
            }
            m_LemmaDic.Add(token, new KeyValuePair <string, PersianPOSTag>(lemma, posTag));
            return(lemma);
        }
示例#10
0
 public Token(string lexeme, PersianPOSTag persianPOSTag, string lemma)
 {
     Lexeme = lexeme;
     POSTag = persianPOSTag;
     Lemma = lemma;
 }
示例#11
0
        // In fact, it's just a simple stemmer, not a _lemmatizer
        private string GetLemma(string token, out PersianPOSTag posTag)
        {
            string lemma = token;
            if (m_LemmaDic.ContainsKey(token))
            {
                KeyValuePair<string, PersianPOSTag> valuePair = m_LemmaDic[token];
                posTag = valuePair.Value;
                return valuePair.Key;
            }
            posTag = PersianPOSTag.UserPOS;

            var rpmpis = _lemmatizer.MatchForSuffix(token);
            for (int index = rpmpis.Length - 1; index >= 0; index--)
            {
                var rpmpi = rpmpis[index];
                if (Mapper.ContainsKey(rpmpi.BaseWord))
                {
                    lemma = rpmpi.BaseWord;

                    // Manual rules go here, probably from a seprate fromatted file
                    if (Mapper[rpmpi.BaseWord] == PersianPOSTag.N && rpmpi.Suffix == "ی")
                    {
                        posTag = PersianPOSTag.AJ;
                        break;
                    }

                    posTag = Mapper[rpmpi.BaseWord];
                    break;
                }
                PersianPOSTag possibletags = _lemmatizer.AcceptingPOS(_lemmatizer.SuffixCategory(rpmpi.Suffix));
                posTag = GetMostFrequent(possibletags);
            }
            m_LemmaDic.Add(token, new KeyValuePair<string, PersianPOSTag>(lemma, posTag));
            return lemma;
        }
示例#12
0
        private static PersianPOSTag GetMostFrequent(PersianPOSTag possibletags)
        {
            // In order of frequency
            if ((possibletags & PersianPOSTag.N) == PersianPOSTag.N)
                return PersianPOSTag.N;
            if ((possibletags & PersianPOSTag.P) == PersianPOSTag.P)
                return PersianPOSTag.P;
            if ((possibletags & PersianPOSTag.PUNC) == PersianPOSTag.PUNC)
                return PersianPOSTag.PUNC;
            if ((possibletags & PersianPOSTag.V) == PersianPOSTag.V)
                return PersianPOSTag.V;
            if ((possibletags & PersianPOSTag.AJ) == PersianPOSTag.AJ)
                return PersianPOSTag.AJ;
            if ((possibletags & PersianPOSTag.CONJ) == PersianPOSTag.CONJ)
                return PersianPOSTag.CONJ;
            if ((possibletags & PersianPOSTag.NUM) == PersianPOSTag.NUM)
                return PersianPOSTag.NUM;
            if ((possibletags & PersianPOSTag.NUM) == PersianPOSTag.NUM)
                return PersianPOSTag.NUM;

            return PersianPOSTag.UserPOS;
        }
示例#13
0
 public Token(string lexeme, PersianPOSTag persianPOSTag, string lemma)
 {
     Lexeme = lexeme;
     POSTag = persianPOSTag;
     Lemma  = lemma;
 }