private void ApplyPersianDeclensionRules(PersianPartOfSpeech[] baseTags, double[] baseWeights, string suffix, out PersianPartOfSpeech[] targetTags, out double[] targetWeights) { Dictionary <PersianPartOfSpeech, double> results = new Dictionary <PersianPartOfSpeech, double>(); PersianSuffixesCategory suffixCat = InflectionAnalyser.SuffixCategory(suffix); for (int i = 0; i < baseTags.Length; i++) { foreach (PersianSuffixesCategory cat in Enum.GetValues(typeof(PersianSuffixesCategory))) { if ((suffixCat & cat) == cat) { PersianPartOfSpeech pos = ApplyDeclension(baseTags[i], cat); if (pos == (PersianPartOfSpeech.Adjective | PersianPartOfSpeech.Comparative) && suffix.Equals("ترین")) { pos = PersianPartOfSpeech.Adjective | PersianPartOfSpeech.Superlative; } if (!results.ContainsKey(pos)) { results.Add(pos, baseWeights[i]); } else { results[pos] = Math.Max(baseWeights[i], results[pos]); } } } } double sum = results.Values.Sum(); targetTags = (from result in results orderby result.Value descending select result.Key).ToArray(); targetWeights = (from result in results orderby result.Value descending select result.Value / sum).ToArray(); }
// TODO: Examin the rules! We also might want to add probability to rule application. // TODO: Also consider revising the order of rules private static PersianPartOfSpeech ApplyDeclension(PersianPartOfSpeech baseTag, PersianSuffixesCategory suffixCat) { if (baseTag == PersianPartOfSpeech.Adjective && (suffixCat == PersianSuffixesCategory.PluralSignHaa || suffixCat == PersianSuffixesCategory.PluralSignAan)) { return(PersianPartOfSpeech.Noun | PersianPartOfSpeech.Plural); } if (baseTag == PersianPartOfSpeech.Noun && suffixCat == PersianSuffixesCategory.YaaNesbat) { return(PersianPartOfSpeech.Adjective); } if (baseTag == PersianPartOfSpeech.Adjective && suffixCat == PersianSuffixesCategory.ToBeVerb) { return(PersianPartOfSpeech.Noun); } if (baseTag == PersianPartOfSpeech.Noun && suffixCat == PersianSuffixesCategory.ComparativeAdjectives) { return(PersianPartOfSpeech.Adjective | PersianPartOfSpeech.Comparative); } if (baseTag == PersianPartOfSpeech.Unknown) { if (suffixCat == PersianSuffixesCategory.ComparativeAdjectives) { return(PersianPartOfSpeech.Adjective | PersianPartOfSpeech.Comparative); } if (suffixCat == PersianSuffixesCategory.PluralSignHaa) { return(PersianPartOfSpeech.Noun | PersianPartOfSpeech.Plural); } if (suffixCat == PersianSuffixesCategory.PluralSignAan) { return(PersianPartOfSpeech.Noun | PersianPartOfSpeech.Plural); } } return(baseTag); }
public static POSDictionaryEntry?Parse(string line) { POSDictionaryEntry entry = new POSDictionaryEntry { Lexeme = "", Tags = new Dictionary <PersianPartOfSpeech, double>() }; string[] parts = line.Split(new[] { '\t' }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length != 2) { return(null); } entry.Lexeme = parts[0]; string[] elements = parts[1].Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); foreach (var element in elements) { PersianPartOfSpeech pos = (PersianPartOfSpeech)Enum.Parse(typeof(PersianPartOfSpeech), element.Split(new[] { ':' }, StringSplitOptions.RemoveEmptyEntries)[0]); double weight = double.Parse(element.Split(new[] { ':' }, StringSplitOptions.RemoveEmptyEntries)[1]); entry.Tags.Add(pos, weight); } return(entry); }
private PersianPartOfSpeech GetVerbPOS(Conjugator.VerbInfo verbInfo) { PersianPartOfSpeech pos = PersianPartOfSpeech.Verb; switch (verbInfo.Person) { case ENUM_TENSE_PERSON.SINGULAR_FIRST: pos |= PersianPartOfSpeech.Singular | PersianPartOfSpeech.FirstPerson; break; case ENUM_TENSE_PERSON.SINGULAR_SECOND: pos |= PersianPartOfSpeech.Singular | PersianPartOfSpeech.SecondPerson; break; case ENUM_TENSE_PERSON.SINGULAR_THIRD: pos |= PersianPartOfSpeech.Singular | PersianPartOfSpeech.ThirdPerson; break; case ENUM_TENSE_PERSON.PLURAL_FIRST: pos |= PersianPartOfSpeech.Plural | PersianPartOfSpeech.FirstPerson; break; case ENUM_TENSE_PERSON.PLURAL_SECOND: pos |= PersianPartOfSpeech.Plural | PersianPartOfSpeech.SecondPerson; break; case ENUM_TENSE_PERSON.PLURAL_THIRD: pos |= PersianPartOfSpeech.Plural | PersianPartOfSpeech.ThirdPerson; break; } switch (verbInfo.Positivity) { case ENUM_TENSE_POSITIVITY.POSITIVE: pos |= PersianPartOfSpeech.Positive; break; case ENUM_TENSE_POSITIVITY.NEGATIVE: pos |= PersianPartOfSpeech.Negative; break; } switch (verbInfo.Time) { case ENUM_TENSE_TIME.AMR: pos |= PersianPartOfSpeech.AMR; break; case ENUM_TENSE_TIME.AYANDE: pos |= PersianPartOfSpeech.AYANDE; break; case ENUM_TENSE_TIME.MAZI_E_BAEID: pos |= PersianPartOfSpeech.MAZI_E_BAEID; break; case ENUM_TENSE_TIME.MAZI_E_BAEIDE_NAGHLI: pos |= PersianPartOfSpeech.MAZI_E_BAEIDE_NAGHLI; break; case ENUM_TENSE_TIME.MAZI_E_ELTEZAMI: pos |= PersianPartOfSpeech.MAZI_E_ELTEZAMI; break; case ENUM_TENSE_TIME.MAZI_E_ESTEMRARI: pos |= PersianPartOfSpeech.MAZI_E_ESTEMRARI; break; case ENUM_TENSE_TIME.MAZI_E_ESTEMRARIE_NAGHLI: pos |= PersianPartOfSpeech.MAZI_E_ESTEMRARIE_NAGHLI; break; case ENUM_TENSE_TIME.MAZI_E_MOSTAMAR: pos |= PersianPartOfSpeech.MAZI_E_MOSTAMAR; break; case ENUM_TENSE_TIME.MAZI_E_MOSTAMARE_NAGHLI: pos |= PersianPartOfSpeech.MAZI_E_MOSTAMARE_NAGHLI; break; case ENUM_TENSE_TIME.MAZI_E_SADE: pos |= PersianPartOfSpeech.MAZI_E_SADE; break; case ENUM_TENSE_TIME.MAZI_E_SADEYE_NAGHLI: pos |= PersianPartOfSpeech.MAZI_E_SADEYE_NAGHLI; break; case ENUM_TENSE_TIME.MOZARE_E_EKHBARI: pos |= PersianPartOfSpeech.MOZARE_E_EKHBARI; break; case ENUM_TENSE_TIME.MOZARE_E_ELTEZAMI: pos |= PersianPartOfSpeech.MOZARE_E_ELTEZAMI; break; case ENUM_TENSE_TIME.MOZARE_E_MOSTAMAR: pos |= PersianPartOfSpeech.MOZARE_E_MOSTAMAR; break; } return(pos); }