Example #1
0
        private void CalculateEmissionForWordTags(List <Tokenizer.WordTag> uncapitalizedWords, List <Tokenizer.WordTag> capitalizedWords)
        {
            this.WordCapitalizedTagsEmissionFrequence = new List <EmissionModel>();
            this.WordTagsEmissionFrequence            = new List <EmissionModel>();

            foreach (var w in capitalizedWords)
            {
                EmissionModel wmFind = WordCapitalizedTagsEmissionFrequence.Find(x => x.Word == w.word);
                if (wmFind == null)
                {
                    EmissionModel wModel = new EmissionModel();
                    wModel.Word = w.word;
                    wModel.TagFreq.Add(w.tag, 1);
                    this.WordCapitalizedTagsEmissionFrequence.Add(wModel);
                }
                else
                {
                    var tag = wmFind.TagFreq.FirstOrDefault(x => x.Key == w.tag);
                    if (tag.Key == null)
                    {
                        wmFind.TagFreq.Add(w.tag, 1);
                    }
                    else
                    {
                        wmFind.TagFreq[tag.Key] += 1;
                    }
                }
            }


            foreach (var w in uncapitalizedWords)
            {
                EmissionModel wmFind = WordTagsEmissionFrequence.Find(x => x.Word == w.word);
                if (wmFind == null)
                {
                    EmissionModel wModel = new EmissionModel();
                    wModel.Word = w.word;
                    wModel.TagFreq.Add(w.tag, 1);
                    this.AddTagToUnigramOccurences(w.tag);
                    this.WordTagsEmissionFrequence.Add(wModel);
                }
                else
                {
                    var tag = wmFind.TagFreq.FirstOrDefault(x => x.Key == w.tag);
                    if (tag.Key == null)
                    {
                        this.AddTagToUnigramOccurences(w.tag);
                        wmFind.TagFreq.Add(w.tag, 1);
                    }
                    else
                    {
                        this.AddTagToUnigramOccurences(w.tag);
                        wmFind.TagFreq[tag.Key] += 1;
                    }
                }
            }
        }
 public static bool HasBehavior <T>(this EmissionModel model) where T : Model
 {
     return(model.behaviors.HasItemsOfType <EmissionBehaviorModel, T>());
 }
 public static void RemoveBehaviors <T>(this EmissionModel model) where T : Model
 {
     model.behaviors = model.behaviors.RemoveItemsOfType <EmissionBehaviorModel, T>(model);
 }
 public static void RemoveBehavior <T>(this EmissionModel model, T behavior) where T : Model
 {
     model.behaviors = model.behaviors.RemoveItem(behavior);
     model.RemoveChildDependant(behavior);
 }
 public static void AddBehavior <T>(this EmissionModel model, T behavior) where T : EmissionBehaviorModel
 {
     model.behaviors = model.behaviors.AddTo(behavior);
     model.AddChildDependant(behavior);
 }
 public static List <T> GetBehaviors <T>(this EmissionModel model) where T : Model
 {
     return(model.behaviors.GetItemsOfType <EmissionBehaviorModel, T>());
 }
Example #7
0
        /// <summary>
        /// Calculates Emission Frequence for suffix & preffix training capitalized & uncapitalized words.
        /// </summary>
        /// <param name="uncapitalizedWords"></param>
        /// <param name="capitalizedWords"></param>
        /// <param name="pref"></param>
        /// <param name="suff"></param>
        /// <param name="capitalSuff"></param>
        /// <param name="capitalPref"></param>
        /// <param name="suffxem"></param>
        /// <param name="preffxem"></param>
        private void CalculateSuffixPrefixFrequence(
            List <Tokenizer.WordTag> uncapitalizedWords,
            List <Tokenizer.WordTag> capitalizedWords,
            List <string> pref,
            List <string> suff,
            List <EmissionModel> capitalSuff,
            List <EmissionModel> capitalPref,
            List <EmissionModel> suffxem,
            List <EmissionModel> preffxem)
        {
            foreach (var item in suff)
            {
                var emuw = new EmissionModel();
                var emcw = new EmissionModel();
                emuw.Word = item;
                emcw.Word = item;
                suffxem.Add(emuw);
                capitalSuff.Add(emcw);
            }

            foreach (var item in pref)
            {
                var emuwp = new EmissionModel();
                var emcwp = new EmissionModel();
                emuwp.Word = item;
                emcwp.Word = item;
                preffxem.Add(emuwp);
                capitalPref.Add(emcwp);
            }

            foreach (var w in capitalizedWords)
            {
                foreach (var sfx in capitalSuff)
                {
                    if (w.word.EndsWith(sfx.Word))
                    {
                        var tag = sfx.TagFreq.FirstOrDefault(x => x.Key == w.tag);
                        if (tag.Key == null)
                        {
                            sfx.TagFreq.Add(w.tag, 1);
                        }
                        else
                        {
                            sfx.TagFreq[tag.Key] += 1;
                        }
                    }
                }

                foreach (var pfx in capitalPref)
                {
                    string wordLow = w.word.ToLower();
                    if (wordLow.StartsWith(pfx.Word))
                    {
                        var tag = pfx.TagFreq.FirstOrDefault(x => x.Key == w.tag);
                        if (tag.Key == null)
                        {
                            pfx.TagFreq.Add(w.tag, 1);
                        }
                        else
                        {
                            pfx.TagFreq[tag.Key] += 1;
                        }
                    }
                }
            }

            foreach (var w in uncapitalizedWords)
            {
                foreach (var sfx in suffxem)
                {
                    if (w.word.EndsWith(sfx.Word))
                    {
                        var tag = sfx.TagFreq.FirstOrDefault(x => x.Key == w.tag);
                        if (tag.Key == null)
                        {
                            sfx.TagFreq.Add(w.tag, 1);
                        }
                        else
                        {
                            sfx.TagFreq[tag.Key] += 1;
                        }
                    }
                }

                foreach (var pfx in preffxem)
                {
                    if (w.word.StartsWith(pfx.Word))
                    {
                        var tag = pfx.TagFreq.FirstOrDefault(x => x.Key == w.tag);
                        if (tag.Key == null)
                        {
                            pfx.TagFreq.Add(w.tag, 1);
                        }
                        else
                        {
                            pfx.TagFreq[tag.Key] += 1;
                        }
                    }
                }
            }
        }