private void button2_Click(object sender, EventArgs e) { GRAMCLASS gramclass = new GRAMCLASS(); if (MemoryClass.Instance.IsEmptyBigramList()) { gramclass.BiGram2(MemoryClass.Instance.Get_Sentences_From_OldCorpus() == ""?Regex_Patterns.Instance.Get_Corpus():MemoryClass.Instance.Get_Sentences_From_OldCorpus(), true); } GRAMCLASS gc = new GRAMCLASS(); string currentSentence = string.Empty; //dataGridView1.Columns.Clear(); DataTable mytable = new DataTable(); mytable.Columns.Add("Sentence", typeof(string)); mytable.Columns.Add("Probability", typeof(double)); DataRow row; double SumProbability = 0; for (int index_gridview_row = 0; index_gridview_row < dataGridView1.Rows.Count - 1; index_gridview_row++) { //foreach (string sentence in ) currentSentence = dataGridView1[0, index_gridview_row].Value.ToString(); ArrayList SS = gc.Seperator_Phase4(currentSentence, false); double PDF = 1; StructData_BiGram dataBigram = new StructData_BiGram(); for (int i = 1; i < SS.Count; i++) { dataBigram.CurrentToken = (string)SS[i].ToString().ToLower(); dataBigram.PrevToken = (string)SS[i - 1].ToString().ToLower(); if (MemoryClass.Instance.GetListBigram.Exists(s => s.CurrentToken == dataBigram.CurrentToken && s.PrevToken == dataBigram.PrevToken)) { StructData_BiGram bigrm = MemoryClass.Instance.GetListBigram[MemoryClass.Instance.GetListBigram.FindIndex(s => s.CurrentToken == dataBigram.CurrentToken && s.PrevToken == dataBigram.PrevToken)]; PDF *= Math.Pow(bigrm.Probability, (double)1 / SS.Count); } else { PDF = 0; } } row = mytable.NewRow(); row[0] = currentSentence; row[1] = PDF; //*((double)currentSentence.Length/Max_Index); SumProbability += PDF; // *((double)currentSentence.Length / Max_Index); mytable.Rows.Add(row); } DataSet ds = new DataSet(); ds.Tables.Add(mytable); dataGridView1.DataSource = ds.Tables[0].AsDataView(); lblAverageResult.Text = ((double)SumProbability / Regex_Patterns.Instance.GetSentenceList.Count).ToString(); }
private void button6_Click(object sender, EventArgs e) { string sentences = string.Empty; foreach (string stnc in Regex_Patterns.Instance.GetSentenceList) { sentences += stnc; } DataSet ds = new DataSet(); DataTable mytable = new DataTable(); mytable.Columns.Add("Token", typeof(string)); mytable.Columns.Add("PrevToken", typeof(string)); mytable.Columns.Add("Probility", typeof(double)); mytable.Columns.Add("Count", typeof(int)); DataRow r; double SumProbability = 0; List <StructData_BiGram> l = new GRAMCLASS().BiGram2(sentences, false); foreach (StructData_BiGram SD in l) { r = mytable.NewRow(); r[0] = SD.CurrentToken; r[1] = SD.PrevToken; r[2] = SD.Probability; SumProbability += SD.Probability; r[3] = SD.Count; mytable.Rows.Add(r); } ds.Tables.Add(mytable); dataGridView1.DataSource = ds.Tables[0].DefaultView; lblAverageResult.Text = ((double)SumProbability / l.Count).ToString(); }
private void AutoChange_Load(object sender, EventArgs e) { Fill_MemoryClass(); GRAMCLASS gc = new GRAMCLASS(); DataTable table1 = new DataTable(); table1.Columns.Add("جمله", typeof(string)); DataRow row; foreach (string sentence in Regex_Patterns.Instance.GetSentenceList) { //------ add to table ------ row = table1.NewRow(); row[0] = sentence; table1.Rows.Add(row); } DataSet ds = new DataSet(); ds.Tables.Add(table1); dataGridView1.DataSource = ds.Tables[0].AsDataView(); dataGridView1.AutoSizeColumnsMode = DataGridViewAutoSizeColumnsMode.Fill; //------ show colors for change events in datagridview lblDelete.BackColor = Color.SeaGreen; lblInsert.BackColor = Color.DarkKhaki; lblSwitch.BackColor = Color.Red; lblIncorrectSpell.BackColor = Color.Silver; //----------------Disable buttons ----------------- button2.Enabled = false; button3.Enabled = false; button4.Enabled = false; }
private void button1_Click_1(object sender, EventArgs e) { GRAMCLASS gc = new GRAMCLASS(); string corpus = string.Empty; Fill_MemoryClass(); openFileDialog1.Filter = "HtmFile (*.htm)|*.htm|Htmlfile (*.html)|*.html|MhtFile(*.mht)|*.mht"; DialogResult dr = openFileDialog1.ShowDialog(); if (dr == DialogResult.OK) { StreamReader file = new StreamReader(openFileDialog1.FileName); corpus = file.ReadToEnd(); Form1 f1 = new Form1(); HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); document.Load(new MemoryStream(File.ReadAllBytes(openFileDialog1.FileName)), UnicodeEncoding.UTF8); corpus = ExtractViewableTextCleaned(document.DocumentNode).Replace(">", "").Replace(" ", "").Replace(""", ""); //corpus = f1.RemoveTags(corpus); file.Close(); } //------------ Start Process --------- Regex_Patterns.Instance.ClearDigitList(); Regex_Patterns.Instance.ClearEndMarkList(); Regex_Patterns.Instance.ClearSignList(); Regex_Patterns.Instance.ClearSentenceList(); Regex_Patterns.Instance.ClearWordList(); Regex_Patterns.Instance.ClearCurrentSentence(); Regex_Patterns.Instance.Tape = corpus; Regex_Patterns.Instance.HeadTape = -1; while (Regex_Patterns.Instance.HeadTape < Regex_Patterns.Instance.Tape.Length - 1) { new S0().Compute(); } DataTable mytable = new DataTable(); mytable.Columns.Add("sentence", typeof(string)); DataRow row; sentencesOfCorpus = Regex_Patterns.Instance.GetSentenceList; foreach (string sent in Regex_Patterns.Instance.GetSentenceList) { row = mytable.NewRow(); row[0] = sent; mytable.Rows.Add(row); } DataSet ds = new DataSet(); ds.Tables.Add(mytable); dataGridView1.DataSource = ds.Tables[0].AsDataView(); btnProcessTrigram.Enabled = true; btnProcessBigram.Enabled = true; btnProcessUnigram.Enabled = true; }
private void button3_Click_1(object sender, EventArgs e) { GRAMCLASS gc = new GRAMCLASS(); if (MemoryClass.Instance.isEmptyTrigramList()) { gc.TriGram2(MemoryClass.Instance.Get_Sentences_From_OldCorpus(), true); } string currentSentence = string.Empty; DataTable mytable = new DataTable(); mytable.Columns.Add("Sentence", typeof(string)); mytable.Columns.Add("Probability", typeof(double)); DataRow row; double SumProbability = 0; for (int rowIndex = 0; rowIndex < (dataGridView1.Rows.Count - 1); rowIndex++) { currentSentence = dataGridView1[0, rowIndex].Value.ToString(); ArrayList SS = gc.Seperator_Phase4(currentSentence, false); double PDF = 1; StructData_TriGram dataTrigram = new StructData_TriGram(); for (int i = 2; i < SS.Count; i++) { dataTrigram.CurrentToken = ((string)SS[i]).ToLower(); dataTrigram.PrevToken = ((string)SS[i - 1]).ToLower(); dataTrigram.PrevPrevToken = ((string)SS[i - 2]).ToLower(); if (MemoryClass.Instance.GetListTrigram.Exists(s => s.CurrentToken == dataTrigram.CurrentToken && s.PrevToken == dataTrigram.PrevToken && s.PrevPrevToken == dataTrigram.PrevPrevToken)) { StructData_TriGram trigrm = MemoryClass.Instance.GetListTrigram[gc.ListTrigram.FindIndex(s => s.CurrentToken == dataTrigram.CurrentToken && s.PrevToken == dataTrigram.PrevToken && s.PrevPrevToken == dataTrigram.PrevPrevToken)]; int root = (trigrm.Probability != 1) ? trigrm.Probability.ToString().Remove(0, 2).Length : 1; PDF *= Math.Pow(trigrm.Probability, (double)1 / SS.Count); } else { PDF = 0; } } row = mytable.NewRow(); row[0] = currentSentence; row[1] = (PDF); // *((double)currentSentence.Length / Max_Index); SumProbability += (PDF); // *((double)currentSentence.Length / Max_Index); mytable.Rows.Add(row); } DataSet ds = new DataSet(); ds.Tables.Add(mytable); dataGridView1.DataSource = ds.Tables[0].AsDataView(); lblAverageResult.Text = ((double)SumProbability / Regex_Patterns.Instance.GetSentenceList.Count).ToString(); }
private void btnOpenFile_Click(object sender, EventArgs e) { GRAMCLASS gc = new GRAMCLASS(); string corpus = string.Empty; Fill_MemoryClass(); openFileDialog1.Filter = "HtmFile (*.htm)|*.htm|Htmlfile (*.html)|*.html|MhtFile(*.mht)|*.mht"; DialogResult dr = openFileDialog1.ShowDialog(); if (dr == DialogResult.OK) { StreamReader file = new StreamReader(openFileDialog1.FileName); corpus = file.ReadToEnd(); Form1 f1 = new Form1(); corpus = f1.RemoveTags(corpus); file.Close(); } //------------ Start Process --------- Regex_Patterns.Instance.ClearDigitList(); Regex_Patterns.Instance.ClearEndMarkList(); Regex_Patterns.Instance.ClearSignList(); Regex_Patterns.Instance.ClearSentenceList(); Regex_Patterns.Instance.ClearWordList(); Regex_Patterns.Instance.ClearCurrentSentence(); Regex_Patterns.Instance.Tape = corpus; Regex_Patterns.Instance.HeadTape = -1; while (Regex_Patterns.Instance.HeadTape < Regex_Patterns.Instance.Tape.Length - 1) { new S0().Compute(); } DataTable mytable = new DataTable(); mytable.Columns.Add("sentence", typeof(string)); DataRow row; sentencesOfCorpus = Regex_Patterns.Instance.GetSentenceList; foreach (string sent in Regex_Patterns.Instance.GetSentenceList) { row = mytable.NewRow(); row[0] = sent; mytable.Rows.Add(row); } DataSet ds = new DataSet(); ds.Tables.Add(mytable); dataGridView1.DataSource = ds.Tables[0].AsDataView(); btnProcessTrigram.Enabled = true; btnProcessBigram.Enabled = true; btnProcessUnigram.Enabled = true; }
private void button4_Click_1(object sender, EventArgs e) { GRAMCLASS gramclass = new GRAMCLASS(); DataTable table = new DataTable(); table.Columns.Add("sentence", typeof(string)); table.Columns.Add("Probability", typeof(double)); DataRow row; double SumProbability = 0; int CountOfTotalTokens = MemoryClass.Instance.Get_CountOfTotalTokens(); string sentence = string.Empty; //foreach (string sentence in Regex_Patterns.Instance.GetSentenceList) for (int index_row = 0; index_row < dataGridView1.Rows.Count - 1; index_row++) { sentence = dataGridView1.Rows[index_row].Cells[0].Value.ToString(); ArrayList al = gramclass.Seperator_Phase4(sentence, false); double PDF = 1; foreach (string token in al) { int Count = MemoryClass.Instance.Get_CountOfSpecialToken_Unigram(token); if (Count != -1) { PDF *= (double)MemoryClass.Instance.Get_CountOfSpecialToken_Unigram(token) / CountOfTotalTokens; } else { PDF = 0; break; } } row = table.NewRow(); row[0] = sentence; row[1] = PDF; SumProbability += (PDF); table.Rows.Add(row); } DataSet ds = new DataSet(); ds.Tables.Add(table); dataGridView1.DataSource = ds.Tables[0].AsDataView(); lblAverageResult.Text = ((double)SumProbability / Regex_Patterns.Instance.GetSentenceList.Count).ToString(); }
private string Select_Token_From_GridView(out ArrayList al, out int token_index, out int row_index) { GRAMCLASS gc = new GRAMCLASS(); //------- create row_index randomly -------------------------------------- row_index = Create_Random_index(dataGridView1.Rows.Count - 1, false); //------ select row randomly and seperate sentence within row ------- al = new ArrayList(); al.AddRange(gc.Seperator_ChangeCorpus(dataGridView1.Rows[row_index].Cells[0].Value.ToString())); //------ select a token randomly from previous line -------- token_index = Create_Random_index(al.Count - 1, true); string cur_token = al[token_index].ToString(); if (Regex_Patterns.Instance.GetMarkType(cur_token) != TypeMark.Word && Regex_Patterns.Instance.GetMarkType(cur_token) != TypeMark.Digit) { Select_Token_From_GridView(out al, out token_index, out row_index); } return(cur_token); }
private void Switch_tokens_Corpus() { GRAMCLASS gc = new GRAMCLASS(); Random rnd_row_index = new Random(); Random rnd_token_index = new Random(); //---------- first select row and token randomly -------- ArrayList al; int token_index_1; int row_index_1; string cur_token = Select_Token_From_GridView(out al, out token_index_1, out row_index_1); //---------- second select row and token randomly -------- int token_index_2; int row_index_2; ArrayList al_new; string new_token = Select_Token_From_GridView(out al_new, out token_index_2, out row_index_2); //------ switch tokens -------- al.RemoveAt(token_index_1); al.Insert(token_index_1, new_token); al_new.Remove(token_index_2); al_new.Insert(token_index_2, cur_token); //----- fill sentence from al list -------- string current_sentence = dataGridView1.Rows[row_index_1].Cells[0].Value.ToString(); string new_sentence1 = string.Empty; string new_sentence2 = string.Empty; for (int al_i = 0; al_i < al.Count; al_i++) { new_sentence1 += " " + al[al_i].ToString(); } for (int al_new_i = 0; al_new_i < al_new.Count; al_new_i++) { new_sentence2 += " " + al_new[al_new_i].ToString(); } dataGridView1.Rows[row_index_1].Cells[0].Value = new_sentence1; dataGridView1.Rows[row_index_2].Cells[0].Value = new_sentence2; dataGridView1.Rows[row_index_1].DefaultCellStyle.BackColor = Color.Red; dataGridView1.Rows[row_index_2].DefaultCellStyle.BackColor = Color.Red; }
private void Show_Phase4_TO_GridView(Form f, DataSet ds, bool isTrigram) { DataTable table1 = new DataTable(); table1.Columns.Add("Setntence", typeof(string)); table1.Columns.Add("Probability", typeof(double)); DataRow r; DataGridView dg2 = ((DataGridView)f.Controls[f.Controls.IndexOfKey("grid_sentences")]); GRAMCLASS gl = new GRAMCLASS(); List <StructData_BiGram> bigram = new List <StructData_BiGram>();//= gl.BiGram(Regex_Patterns.Instance.Tape); List <StructData_TriGram> trigram = new List <StructData_TriGram>(); string sentences = string.Empty; foreach (string sentence in Regex_Patterns.Instance.GetSentenceList) { sentences += sentence; } if (isTrigram) { trigram = gl.TriGram(sentences.ToLower()); } else { bigram = gl.BiGram(sentences.ToLower()); } //bool isFirst = true; double Avg = 0; foreach (DataGridViewRow row in dg2.Rows) { if (row.Cells[0].Value != null) { string sentence = string.Empty; sentence = row.Cells[0].Value.ToString(); ArrayList new_SeperatedSentence = gl.Seperator_Phase4(sentence, isTrigram);// BiGram r = table1.NewRow(); r[0] = row.Cells[0].Value.ToString(); double currentProbability; if (isTrigram) { currentProbability = gl.Verify_Sentence(new_SeperatedSentence, trigram, isTrigram); } else { currentProbability = gl.Verify_Sentence(new_SeperatedSentence, bigram, isTrigram); } r[1] = currentProbability; Avg += currentProbability; table1.Rows.Add(r); } } ds.Tables.Add(table1); DataView dv = new DataView(); dv = ds.Tables[1].AsDataView(); dv.Sort = "Probability ASC"; dataGridView1.DataSource = dv; lblAverageResult.Text = ((double)Avg / (dataGridView1.Rows.Count - 1)).ToString(); }
private void BindDataToDataGridView(bool isTriGram, bool isCorpus) { string sentences = string.Empty; DataSet ds = new DataSet(); DataTable mytable = new DataTable(); mytable.Columns.Add("Token", typeof(string)); mytable.Columns.Add("PrevToken", typeof(string)); if (isTriGram) { mytable.Columns.Add("PrevPrevToken", typeof(string)); } mytable.Columns.Add("Probility", typeof(double)); mytable.Columns.Add("Count", typeof(int)); DataRow r; double Avg = 0; if (isCorpus) { foreach (string sent in Regex_Patterns.Instance.GetSentenceList) { sentences += sent; } //sentences += Regex_Patterns.Instance.Tape; if (isTriGram) { List <StructData_TriGram> l = new GRAMCLASS().TriGram(sentences); foreach (StructData_TriGram SD in l) { r = mytable.NewRow(); r[0] = SD.CurrentToken; r[1] = SD.PrevToken; r[2] = SD.PrevPrevToken; r[3] = SD.Probability; r[4] = SD.Count; mytable.Rows.Add(r); Avg += SD.Probability; } } else { List <StructData_BiGram> l = new GRAMCLASS().BiGram(sentences); foreach (StructData_BiGram SD in l) { r = mytable.NewRow(); r[0] = SD.CurrentToken; r[1] = SD.PrevToken; r[2] = SD.Probability; r[3] = SD.Count; mytable.Rows.Add(r); Avg += SD.Probability; } } } else { foreach (string sentence in Regex_Patterns.Instance.GetSentenceList) { sentences = sentence; if (isTriGram) { List <StructData_TriGram> l = new GRAMCLASS().TriGram(sentences); foreach (StructData_TriGram SD in l) { r = mytable.NewRow(); r[0] = SD.CurrentToken; r[1] = SD.PrevToken; r[2] = SD.PrevPrevToken; r[3] = SD.Probability; r[4] = SD.Count; mytable.Rows.Add(r); Avg += SD.Probability; } } else { List <StructData_BiGram> l = new GRAMCLASS().BiGram(sentences); foreach (StructData_BiGram SD in l) { r = mytable.NewRow(); r[0] = SD.CurrentToken; r[1] = SD.PrevToken; r[2] = SD.Probability; r[3] = SD.Count; mytable.Rows.Add(r); Avg += SD.Probability; } } } } ds.Tables.Add(mytable); dataGridView1.DataSource = ds.Tables[0].DefaultView; lblAverageResult.Text = ((double)Avg / (dataGridView1.Rows.Count - 1)).ToString(); }