public List <StructData_TriGram> TriGram(string sentence) { List <StructData_TriGram> list = new List <StructData_TriGram>(); ArrayList l = Seperator(sentence, true); for (int i = 2; i < l.Count; i++) { string curtoken = l[i].ToString(); string prevtoken = l[i - 1].ToString().ToLower(); string prevprevtoken = l[i - 2].ToString().ToLower(); if (list.Exists(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken && s.PrevPrevToken == prevprevtoken)) { StructData_TriGram SD = list[list.FindIndex(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken && s.PrevPrevToken == prevprevtoken)]; SD.Count += 1; SD.Probability = (double)SD.Count / Regex_Patterns.Instance.GetTotalTokens_count(); list[list.FindIndex(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken && s.PrevPrevToken == prevprevtoken)] = SD; } else { StructData_TriGram SD = new StructData_TriGram(); SD.CurrentToken = curtoken; SD.PrevToken = prevtoken; SD.PrevPrevToken = prevprevtoken; SD.Count = 1; SD.Probability = (double)1 / Regex_Patterns.Instance.GetTotalTokens_count(); list.Add(SD); } } return(list); }
public double Verify_Sentence(ArrayList sentence, object nGram, bool isTrigram) { double probability = 1; //------------------------------------------- List <StructData_BiGram> bigramList = new List <StructData_BiGram>(); List <StructData_TriGram> trigramList = new List <StructData_TriGram>(); if (isTrigram) { trigramList = (List <StructData_TriGram>)nGram; } else { bigramList = (List <StructData_BiGram>)nGram; } //--------------------------------------------- if (isTrigram) { for (int i = 2; i < sentence.Count; i++) { StructData_TriGram sdt = new StructData_TriGram(); sdt.CurrentToken = sentence[i].ToString().ToLower(); sdt.PrevToken = sentence[i - 1].ToString().ToLower(); sdt.PrevPrevToken = sentence[i - 2].ToString().ToLower(); if (trigramList.Exists(s => s.CurrentToken == sdt.CurrentToken && s.PrevToken == sdt.PrevToken && s.PrevPrevToken == sdt.PrevPrevToken)) { StructData_TriGram b = trigramList.Find(s => s.CurrentToken == sdt.CurrentToken && s.PrevToken == sdt.PrevToken && s.PrevPrevToken == sdt.PrevPrevToken); double prob = (double)b.Probability; probability = Math.Log10((double)probability + (double)(-Math.Log10(prob))); } else { return(0); } } } else { for (int i = 1; i < sentence.Count; i++) { StructData_BiGram sdb = new StructData_BiGram(); sdb.CurrentToken = sentence[i].ToString().ToLower(); sdb.PrevToken = sentence[i - 1].ToString().ToLower(); if (bigramList.Exists(s => s.CurrentToken == sdb.CurrentToken && s.PrevToken == sdb.PrevToken)) { StructData_BiGram b = bigramList.Find(s => s.CurrentToken == sdb.CurrentToken && s.PrevToken == sdb.PrevToken); double prob = (double)b.Probability; probability = Math.Log10((double)probability + (double)(-Math.Log10(prob))); } else { return(0); } } } return((double)probability); }
private void button3_Click_1(object sender, EventArgs e) { GRAMCLASS gc = new GRAMCLASS(); if (MemoryClass.Instance.isEmptyTrigramList()) { gc.TriGram2(MemoryClass.Instance.Get_Sentences_From_OldCorpus(), true); } string currentSentence = string.Empty; DataTable mytable = new DataTable(); mytable.Columns.Add("Sentence", typeof(string)); mytable.Columns.Add("Probability", typeof(double)); DataRow row; double SumProbability = 0; for (int rowIndex = 0; rowIndex < (dataGridView1.Rows.Count - 1); rowIndex++) { currentSentence = dataGridView1[0, rowIndex].Value.ToString(); ArrayList SS = gc.Seperator_Phase4(currentSentence, false); double PDF = 1; StructData_TriGram dataTrigram = new StructData_TriGram(); for (int i = 2; i < SS.Count; i++) { dataTrigram.CurrentToken = ((string)SS[i]).ToLower(); dataTrigram.PrevToken = ((string)SS[i - 1]).ToLower(); dataTrigram.PrevPrevToken = ((string)SS[i - 2]).ToLower(); if (MemoryClass.Instance.GetListTrigram.Exists(s => s.CurrentToken == dataTrigram.CurrentToken && s.PrevToken == dataTrigram.PrevToken && s.PrevPrevToken == dataTrigram.PrevPrevToken)) { StructData_TriGram trigrm = MemoryClass.Instance.GetListTrigram[gc.ListTrigram.FindIndex(s => s.CurrentToken == dataTrigram.CurrentToken && s.PrevToken == dataTrigram.PrevToken && s.PrevPrevToken == dataTrigram.PrevPrevToken)]; int root = (trigrm.Probability != 1) ? trigrm.Probability.ToString().Remove(0, 2).Length : 1; PDF *= Math.Pow(trigrm.Probability, (double)1 / SS.Count); } else { PDF = 0; } } row = mytable.NewRow(); row[0] = currentSentence; row[1] = (PDF); // *((double)currentSentence.Length / Max_Index); SumProbability += (PDF); // *((double)currentSentence.Length / Max_Index); mytable.Rows.Add(row); } DataSet ds = new DataSet(); ds.Tables.Add(mytable); dataGridView1.DataSource = ds.Tables[0].AsDataView(); lblAverageResult.Text = ((double)SumProbability / Regex_Patterns.Instance.GetSentenceList.Count).ToString(); }
public List <StructData_TriGram> TriGram2(string sentence, bool isPhase4) { List <StructData_TriGram> list = new List <StructData_TriGram>(); if (MemoryClass.Instance.GetListBigram == null) { MemoryClass.Instance.FillBigramList(BiGram2(sentence, false)); } ArrayList l = Seperator(sentence, true); for (int i = 2; i < l.Count; i++) { string curtoken = l[i].ToString().ToLower(); string prevtoken = l[i - 1].ToString().ToLower(); string prevprevtoken = l[i - 2].ToString(); if (list.Exists(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken && s.PrevPrevToken == prevprevtoken)) { StructData_TriGram SD = list[list.FindIndex(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken && s.PrevPrevToken == prevprevtoken)]; SD.Count += 1; if (isPhase4) { SD.Probability = (double)SD.Count / MemoryClass.Instance.Get_CountOfSpecialToken(prevtoken); } else { SD.Probability = (double)SD.Count / Regex_Patterns.Instance.Get_CountOfSpecialToken(prevtoken); } /*if (MemoryClass.Instance.GetListBigram.Exists(s => s.CurrentToken == SD.PrevToken && s.PrevToken == SD.PrevPrevToken)) * { * SD.Probability = (double)SD.Count / MemoryClass.Instance.GetListBigram[MemoryClass.Instance.GetListBigram.FindIndex(s => s.CurrentToken == SD.PrevToken && s.PrevToken == SD.PrevPrevToken)].Count; * }*/ list[list.FindIndex(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken && s.PrevPrevToken == prevprevtoken)] = SD; } else { StructData_TriGram SD = new StructData_TriGram(); SD.CurrentToken = curtoken; SD.PrevToken = prevtoken; SD.PrevPrevToken = prevprevtoken; SD.Count = 1; if (isPhase4) { SD.Probability = (double)SD.Count / MemoryClass.Instance.Get_CountOfSpecialToken(prevtoken); } else { SD.Probability = (double)SD.Count / Regex_Patterns.Instance.Get_CountOfSpecialToken(prevtoken); } /*if (MemoryClass.Instance.GetListBigram.Exists(s => s.CurrentToken == SD.PrevToken && s.PrevToken == SD.PrevPrevToken)) * SD.Probability = (double)SD.Count / MemoryClass.Instance.GetListBigram[MemoryClass.Instance.GetListBigram.FindIndex(s => s.CurrentToken == SD.PrevToken && s.PrevToken == SD.PrevPrevToken)].Count;*/ list.Add(SD); } } ListTrigram = list; MemoryClass.Instance.FillTrigramList(list); return(list); }