//Get StopWord private void button8_Click(object sender, EventArgs e) { var dr = openFileDialog2.ShowDialog(); if (dr != DialogResult.OK) { return; } foreach (var unused in openFileDialog1.FileNames) { string content; using (var reader = new StreamReader(openFileDialog2.FileName, Encoding.UTF8)) { content = reader.ReadToEnd(); } Stopword = new Stopword(content); if (content != "") { lbTxt.Text = @"đã có stopword"; _sword = true; } else { MessageBox.Show(@"File rổng chọn lại", @"Select File Stopword", MessageBoxButtons.OK, MessageBoxIcon.Stop); _sword = false; lbTxt.Text = @"chưa có stopword"; } } }
public document(string title, string content, Stopword st, string rg, int s) { Title = title; Content = content.ToLower(); IStemmer stemmer = new EnglishStemmer(); var regex = ""; if (rg == null) { regex = @"[A-Za-z\-]+"; } else { regex = rg; } var valueEnumerable = Regex.Matches(content, regex); var lt = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList(); count = lt.Count; StopWords = st.Lst.ToArray(); ListWorld = valueEnumerable.Cast <Match>().Select(match => match.Value). ToList().ConvertAll(a => a.ToLower()).Except(StopWords).OrderBy(a => a); ListWorld = ListWorld.ToList().ConvertAll(d => stemmer.Stem(d.ToLower())); }
public Form1() { InitializeComponent(); DTable = new DataTable(); openFileDialog1.Multiselect = true; openFileDialog5.Multiselect = false; Stopword = null; }
public Query(string s, Stopword sw, int e, string rg) { Lstq = new List <Tuple <string, double> >(); var b = s.Split(new string[] { "\t", "\t" }, StringSplitOptions.None); if (b.Length != 2) { return; } IdQuery = b[0]; Title = b[1]; var regex = ""; if (rg == null) { regex = @"[A-Za-z\-]+"; } else { regex = rg; } IStemmer stemmer = new EnglishStemmer(); var valueEnumerable = Regex.Matches(Title = b[1], regex); IEnumerable <string> Lst = null; try { Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList().Except(sw.Lst.ToArray()).OrderBy(a => a); Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower())); } catch (Exception) { Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList().OrderBy(a => a); Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower())); } Dictionary <string, int> counts = Lst.GroupBy(x => x) .ToDictionary(g => g.Key, g => g.Count()); var ls = Lst.Distinct().ToList(); foreach (var item in counts) { Lstq.Add(new Tuple <string, double>(item.Key, (double)item.Value / (double)ls.Count)); } }
public QueryAON(string s, Stopword sw) { lst = new List <Tuple <string, int> >(); const string regex = @"[A-Za-z\-()]+"; IStemmer stemmer = new EnglishStemmer(); var valueEnumerable = Regex.Matches(s, regex); Lst = new List <string>(); Lst1 = new List <string>(); var Lstq = new List <Tuple <string, double> >(); if (sw != null) { var dt = sw.Lst.Where(a => a != "AND" && a != "OR" && a != "NOT").ToList(); Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList().Except(dt.ToArray()).ToList(); Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower())); } else { Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList(); Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower())); } var i = 0; for (var j = 0; j < Lst.Count; j++) { { if (Lst[j] == "and" || Lst[j] == "or" || Lst[j] == "not" || Lst[j] == "(" || Lst[j] == ")") { continue; } lst.Add(new Tuple <string, int>(Lst[j], i)); Lst1.Add(Lst[j]); Lst[j] = i.ToString(); i++; } } }
public Query(string s, Stopword sw, string rg) { var regex = ""; if (rg == null) { regex = @"[A-Za-z\-]+"; } else { regex = rg; } IStemmer stemmer = new EnglishStemmer(); s = s.ToLower(); var valueEnumerable = Regex.Matches(s, regex); IEnumerable <string> Lst = null; Lstq = new List <Tuple <string, double> >(); if (sw != null) { Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList().Except(sw.Lst.ToArray()).OrderBy(a => a); Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower())); } else { Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).OrderBy(a => a).ToList(); Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower())); } var ls = Lst.Distinct().ToList(); foreach (var item in ls) { var l = Lst.Count(a => a == item); Lstq.Add(new Tuple <string, double>(item, (double)l / Lst.ToList().Count)); } }