public List <StructData_BiGram> BiGram(string sentence) { List <StructData_BiGram> list = new List <StructData_BiGram>(); ArrayList l = Seperator(sentence, false); for (int i = 1; i < l.Count; i++) { string curtoken = l[i].ToString().ToLower(); string prevtoken = l[i - 1].ToString().ToLower(); if (list.Exists(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken)) { StructData_BiGram SD = list[list.FindIndex(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken)]; SD.Count += 1; SD.Probability = (double)SD.Count / Regex_Patterns.Instance.GetTotalTokens_count(); list[list.FindIndex(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken)] = SD; } else { StructData_BiGram SD = new StructData_BiGram(); SD.CurrentToken = curtoken; SD.PrevToken = prevtoken; SD.Count = 1; SD.Probability = (double)1 / Regex_Patterns.Instance.GetTotalTokens_count(); list.Add(SD); } } return(list); }
private void button2_Click(object sender, EventArgs e) { GRAMCLASS gramclass = new GRAMCLASS(); if (MemoryClass.Instance.IsEmptyBigramList()) { gramclass.BiGram2(MemoryClass.Instance.Get_Sentences_From_OldCorpus() == ""?Regex_Patterns.Instance.Get_Corpus():MemoryClass.Instance.Get_Sentences_From_OldCorpus(), true); } GRAMCLASS gc = new GRAMCLASS(); string currentSentence = string.Empty; //dataGridView1.Columns.Clear(); DataTable mytable = new DataTable(); mytable.Columns.Add("Sentence", typeof(string)); mytable.Columns.Add("Probability", typeof(double)); DataRow row; double SumProbability = 0; for (int index_gridview_row = 0; index_gridview_row < dataGridView1.Rows.Count - 1; index_gridview_row++) { //foreach (string sentence in ) currentSentence = dataGridView1[0, index_gridview_row].Value.ToString(); ArrayList SS = gc.Seperator_Phase4(currentSentence, false); double PDF = 1; StructData_BiGram dataBigram = new StructData_BiGram(); for (int i = 1; i < SS.Count; i++) { dataBigram.CurrentToken = (string)SS[i].ToString().ToLower(); dataBigram.PrevToken = (string)SS[i - 1].ToString().ToLower(); if (MemoryClass.Instance.GetListBigram.Exists(s => s.CurrentToken == dataBigram.CurrentToken && s.PrevToken == dataBigram.PrevToken)) { StructData_BiGram bigrm = MemoryClass.Instance.GetListBigram[MemoryClass.Instance.GetListBigram.FindIndex(s => s.CurrentToken == dataBigram.CurrentToken && s.PrevToken == dataBigram.PrevToken)]; PDF *= Math.Pow(bigrm.Probability, (double)1 / SS.Count); } else { PDF = 0; } } row = mytable.NewRow(); row[0] = currentSentence; row[1] = PDF; //*((double)currentSentence.Length/Max_Index); SumProbability += PDF; // *((double)currentSentence.Length / Max_Index); mytable.Rows.Add(row); } DataSet ds = new DataSet(); ds.Tables.Add(mytable); dataGridView1.DataSource = ds.Tables[0].AsDataView(); lblAverageResult.Text = ((double)SumProbability / Regex_Patterns.Instance.GetSentenceList.Count).ToString(); }
public double Verify_Sentence(ArrayList sentence, object nGram, bool isTrigram) { double probability = 1; //------------------------------------------- List <StructData_BiGram> bigramList = new List <StructData_BiGram>(); List <StructData_TriGram> trigramList = new List <StructData_TriGram>(); if (isTrigram) { trigramList = (List <StructData_TriGram>)nGram; } else { bigramList = (List <StructData_BiGram>)nGram; } //--------------------------------------------- if (isTrigram) { for (int i = 2; i < sentence.Count; i++) { StructData_TriGram sdt = new StructData_TriGram(); sdt.CurrentToken = sentence[i].ToString().ToLower(); sdt.PrevToken = sentence[i - 1].ToString().ToLower(); sdt.PrevPrevToken = sentence[i - 2].ToString().ToLower(); if (trigramList.Exists(s => s.CurrentToken == sdt.CurrentToken && s.PrevToken == sdt.PrevToken && s.PrevPrevToken == sdt.PrevPrevToken)) { StructData_TriGram b = trigramList.Find(s => s.CurrentToken == sdt.CurrentToken && s.PrevToken == sdt.PrevToken && s.PrevPrevToken == sdt.PrevPrevToken); double prob = (double)b.Probability; probability = Math.Log10((double)probability + (double)(-Math.Log10(prob))); } else { return(0); } } } else { for (int i = 1; i < sentence.Count; i++) { StructData_BiGram sdb = new StructData_BiGram(); sdb.CurrentToken = sentence[i].ToString().ToLower(); sdb.PrevToken = sentence[i - 1].ToString().ToLower(); if (bigramList.Exists(s => s.CurrentToken == sdb.CurrentToken && s.PrevToken == sdb.PrevToken)) { StructData_BiGram b = bigramList.Find(s => s.CurrentToken == sdb.CurrentToken && s.PrevToken == sdb.PrevToken); double prob = (double)b.Probability; probability = Math.Log10((double)probability + (double)(-Math.Log10(prob))); } else { return(0); } } } return((double)probability); }
public List <StructData_BiGram> BiGram2(string sentence, bool isPhase4) { List <StructData_BiGram> list = new List <StructData_BiGram>(); ArrayList l = Seperator(sentence, false); for (int i = 1; i < l.Count; i++) { string curtoken = l[i].ToString().ToLower(); string prevtoken = l[i - 1].ToString().ToLower(); if (list.Exists(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken)) { StructData_BiGram SD = list[list.FindIndex(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken)]; SD.Count += 1; if (isPhase4) { SD.Probability = (double)SD.Count / MemoryClass.Instance.Get_CountOfSpecialToken(prevtoken); } else { SD.Probability = (double)SD.Count / Regex_Patterns.Instance.Get_CountOfSpecialToken(prevtoken); } //SD.Probability = (double)SD.Count / Regex_Patterns.Instance.Get_CountOfSpecialToken(prevtoken); list[list.FindIndex(s => s.CurrentToken == curtoken && s.PrevToken == prevtoken)] = SD;// Add to Count for exist item } else { StructData_BiGram SD = new StructData_BiGram(); SD.CurrentToken = curtoken; SD.PrevToken = prevtoken; SD.Count = 1; if (isPhase4) { SD.Probability = (double)1 / MemoryClass.Instance.Get_CountOfSpecialToken(prevtoken); } else { SD.Probability = (double)1 / Regex_Patterns.Instance.Get_CountOfSpecialToken(prevtoken); } list.Add(SD); //Add new item to list } } ListBigram = list; MemoryClass.Instance.FillBigramList(list); return(list); }