示例#1
0
        private void ApplyPersianDeclensionRules(PersianPartOfSpeech[] baseTags, double[] baseWeights, string suffix, out PersianPartOfSpeech[] targetTags, out double[] targetWeights)
        {
            Dictionary <PersianPartOfSpeech, double> results = new Dictionary <PersianPartOfSpeech, double>();
            PersianSuffixesCategory suffixCat = InflectionAnalyser.SuffixCategory(suffix);

            for (int i = 0; i < baseTags.Length; i++)
            {
                foreach (PersianSuffixesCategory cat in Enum.GetValues(typeof(PersianSuffixesCategory)))
                {
                    if ((suffixCat & cat) == cat)
                    {
                        PersianPartOfSpeech pos = ApplyDeclension(baseTags[i], cat);
                        if (pos == (PersianPartOfSpeech.Adjective | PersianPartOfSpeech.Comparative) && suffix.Equals("ترین"))
                        {
                            pos = PersianPartOfSpeech.Adjective | PersianPartOfSpeech.Superlative;
                        }
                        if (!results.ContainsKey(pos))
                        {
                            results.Add(pos, baseWeights[i]);
                        }
                        else
                        {
                            results[pos] = Math.Max(baseWeights[i], results[pos]);
                        }
                    }
                }
            }
            double sum = results.Values.Sum();

            targetTags    = (from result in results orderby result.Value descending select result.Key).ToArray();
            targetWeights = (from result in results orderby result.Value descending select result.Value / sum).ToArray();
        }
示例#2
0
        // TODO: Examin the rules! We also might want to add probability to rule application.
        // TODO: Also consider revising the order of rules
        private static PersianPartOfSpeech ApplyDeclension(PersianPartOfSpeech baseTag, PersianSuffixesCategory suffixCat)
        {
            if (baseTag == PersianPartOfSpeech.Adjective && (suffixCat == PersianSuffixesCategory.PluralSignHaa || suffixCat == PersianSuffixesCategory.PluralSignAan))
            {
                return(PersianPartOfSpeech.Noun | PersianPartOfSpeech.Plural);
            }

            if (baseTag == PersianPartOfSpeech.Noun && suffixCat == PersianSuffixesCategory.YaaNesbat)
            {
                return(PersianPartOfSpeech.Adjective);
            }

            if (baseTag == PersianPartOfSpeech.Adjective && suffixCat == PersianSuffixesCategory.ToBeVerb)
            {
                return(PersianPartOfSpeech.Noun);
            }

            if (baseTag == PersianPartOfSpeech.Noun && suffixCat == PersianSuffixesCategory.ComparativeAdjectives)
            {
                return(PersianPartOfSpeech.Adjective | PersianPartOfSpeech.Comparative);
            }

            if (baseTag == PersianPartOfSpeech.Unknown)
            {
                if (suffixCat == PersianSuffixesCategory.ComparativeAdjectives)
                {
                    return(PersianPartOfSpeech.Adjective | PersianPartOfSpeech.Comparative);
                }
                if (suffixCat == PersianSuffixesCategory.PluralSignHaa)
                {
                    return(PersianPartOfSpeech.Noun | PersianPartOfSpeech.Plural);
                }
                if (suffixCat == PersianSuffixesCategory.PluralSignAan)
                {
                    return(PersianPartOfSpeech.Noun | PersianPartOfSpeech.Plural);
                }
            }

            return(baseTag);
        }
示例#3
0
            public static POSDictionaryEntry?Parse(string line)
            {
                POSDictionaryEntry entry = new POSDictionaryEntry
                {
                    Lexeme = "",
                    Tags   = new Dictionary <PersianPartOfSpeech, double>()
                };

                string[] parts = line.Split(new[] { '\t' }, StringSplitOptions.RemoveEmptyEntries);
                if (parts.Length != 2)
                {
                    return(null);
                }
                entry.Lexeme = parts[0];

                string[] elements = parts[1].Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
                foreach (var element in elements)
                {
                    PersianPartOfSpeech pos = (PersianPartOfSpeech)Enum.Parse(typeof(PersianPartOfSpeech), element.Split(new[] { ':' }, StringSplitOptions.RemoveEmptyEntries)[0]);
                    double weight           = double.Parse(element.Split(new[] { ':' }, StringSplitOptions.RemoveEmptyEntries)[1]);
                    entry.Tags.Add(pos, weight);
                }
                return(entry);
            }
示例#4
0
        private PersianPartOfSpeech GetVerbPOS(Conjugator.VerbInfo verbInfo)
        {
            PersianPartOfSpeech pos = PersianPartOfSpeech.Verb;

            switch (verbInfo.Person)
            {
            case ENUM_TENSE_PERSON.SINGULAR_FIRST:
                pos |= PersianPartOfSpeech.Singular | PersianPartOfSpeech.FirstPerson;
                break;

            case ENUM_TENSE_PERSON.SINGULAR_SECOND:
                pos |= PersianPartOfSpeech.Singular | PersianPartOfSpeech.SecondPerson;
                break;

            case ENUM_TENSE_PERSON.SINGULAR_THIRD:
                pos |= PersianPartOfSpeech.Singular | PersianPartOfSpeech.ThirdPerson;
                break;

            case ENUM_TENSE_PERSON.PLURAL_FIRST:
                pos |= PersianPartOfSpeech.Plural | PersianPartOfSpeech.FirstPerson;
                break;

            case ENUM_TENSE_PERSON.PLURAL_SECOND:
                pos |= PersianPartOfSpeech.Plural | PersianPartOfSpeech.SecondPerson;
                break;

            case ENUM_TENSE_PERSON.PLURAL_THIRD:
                pos |= PersianPartOfSpeech.Plural | PersianPartOfSpeech.ThirdPerson;
                break;
            }

            switch (verbInfo.Positivity)
            {
            case ENUM_TENSE_POSITIVITY.POSITIVE:
                pos |= PersianPartOfSpeech.Positive;
                break;

            case ENUM_TENSE_POSITIVITY.NEGATIVE:
                pos |= PersianPartOfSpeech.Negative;
                break;
            }

            switch (verbInfo.Time)
            {
            case ENUM_TENSE_TIME.AMR:
                pos |= PersianPartOfSpeech.AMR;
                break;

            case ENUM_TENSE_TIME.AYANDE:
                pos |= PersianPartOfSpeech.AYANDE;
                break;

            case ENUM_TENSE_TIME.MAZI_E_BAEID:
                pos |= PersianPartOfSpeech.MAZI_E_BAEID;
                break;

            case ENUM_TENSE_TIME.MAZI_E_BAEIDE_NAGHLI:
                pos |= PersianPartOfSpeech.MAZI_E_BAEIDE_NAGHLI;
                break;

            case ENUM_TENSE_TIME.MAZI_E_ELTEZAMI:
                pos |= PersianPartOfSpeech.MAZI_E_ELTEZAMI;
                break;

            case ENUM_TENSE_TIME.MAZI_E_ESTEMRARI:
                pos |= PersianPartOfSpeech.MAZI_E_ESTEMRARI;
                break;

            case ENUM_TENSE_TIME.MAZI_E_ESTEMRARIE_NAGHLI:
                pos |= PersianPartOfSpeech.MAZI_E_ESTEMRARIE_NAGHLI;
                break;

            case ENUM_TENSE_TIME.MAZI_E_MOSTAMAR:
                pos |= PersianPartOfSpeech.MAZI_E_MOSTAMAR;
                break;

            case ENUM_TENSE_TIME.MAZI_E_MOSTAMARE_NAGHLI:
                pos |= PersianPartOfSpeech.MAZI_E_MOSTAMARE_NAGHLI;
                break;

            case ENUM_TENSE_TIME.MAZI_E_SADE:
                pos |= PersianPartOfSpeech.MAZI_E_SADE;
                break;

            case ENUM_TENSE_TIME.MAZI_E_SADEYE_NAGHLI:
                pos |= PersianPartOfSpeech.MAZI_E_SADEYE_NAGHLI;
                break;

            case ENUM_TENSE_TIME.MOZARE_E_EKHBARI:
                pos |= PersianPartOfSpeech.MOZARE_E_EKHBARI;
                break;

            case ENUM_TENSE_TIME.MOZARE_E_ELTEZAMI:
                pos |= PersianPartOfSpeech.MOZARE_E_ELTEZAMI;
                break;

            case ENUM_TENSE_TIME.MOZARE_E_MOSTAMAR:
                pos |= PersianPartOfSpeech.MOZARE_E_MOSTAMAR;
                break;
            }

            return(pos);
        }