public void NormalizeModel(Model Prev, bool laplas) { if (Prev == null) { foreach (var item in endmodel) { item.probability = ((float)item.finded) / endmodel.Sum(ngr => ngr.finded); } } else { foreach (var item in endmodel) { // var prev = item.text.Skip(item.text.Length - 1).Take(1).ToArray(); NGramm prevngr = Prev.endmodel.Find(ngr => new ArraysComparer().Compare(item.text.Skip(item.text.Length - 2).Take(1).ToArray(), ngr.text) == 0); if (prevngr != null) { if (laplas) { item.probability = ((float)item.finded + 1) / prevngr.finded + Prev.endmodel.Count; } else { item.probability = ((float)item.finded) / prevngr.finded; } } } } }
//only unigramm public Candidate ExistsWord(string word) { NGramm ngr = endmodel.First(n => n.textstr.Contains(word)); if (ngr != null) { return(new Candidate() { sts = ngr.textstr, prob = ngr.probability }); } else { return(null); } }
public void AddToken(string[] token) { // bool Applied = false; NGramm ngr = endmodel.FirstOrDefault(ng => new ArraysComparer().Compare(ng.text, token) == 0); if (ngr != null) { // Applied = true; ngr.finded++; } else { ngr = bufffer.FirstOrDefault(ng => new ArraysComparer().Compare(ng.text, token) == 0); if (ngr != null) { ngr.finded++; if (ngr.finded > Program.unknowncount) { endmodel.Add(ngr); bufffer.Remove(ngr); } } else { ngr = new NGramm(token); if (Program.unknowncount == 0) { endmodel.Add(ngr); } else { bufffer.Add(ngr); } } } }