//Get StopWord
        private void button8_Click(object sender, EventArgs e)
        {
            var dr = openFileDialog2.ShowDialog();

            if (dr != DialogResult.OK)
            {
                return;
            }
            foreach (var unused in openFileDialog1.FileNames)
            {
                string content;
                using (var reader = new StreamReader(openFileDialog2.FileName, Encoding.UTF8))
                {
                    content = reader.ReadToEnd();
                }
                Stopword = new Stopword(content);
                if (content != "")
                {
                    lbTxt.Text = @"đã có stopword";
                    _sword     = true;
                }
                else
                {
                    MessageBox.Show(@"File rổng chọn lại", @"Select File Stopword", MessageBoxButtons.OK,
                                    MessageBoxIcon.Stop);
                    _sword     = false;
                    lbTxt.Text = @"chưa có stopword";
                }
            }
        }
Example #2
0
        public document(string title, string content, Stopword st, string rg, int s)
        {
            Title = title;

            Content = content.ToLower();
            IStemmer stemmer = new EnglishStemmer();
            var      regex   = "";

            if (rg == null)
            {
                regex = @"[A-Za-z\-]+";
            }
            else
            {
                regex = rg;
            }
            var valueEnumerable = Regex.Matches(content, regex);
            var lt = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList();

            count     = lt.Count;
            StopWords = st.Lst.ToArray();
            ListWorld = valueEnumerable.Cast <Match>().Select(match => match.Value).
                        ToList().ConvertAll(a => a.ToLower()).Except(StopWords).OrderBy(a => a);
            ListWorld = ListWorld.ToList().ConvertAll(d => stemmer.Stem(d.ToLower()));
        }
        public Form1()
        {
            InitializeComponent();
            DTable = new DataTable();
            openFileDialog1.Multiselect = true;
            openFileDialog5.Multiselect = false;

            Stopword = null;
        }
        public Query(string s, Stopword sw, int e, string rg)
        {
            Lstq = new List <Tuple <string, double> >();
            var b = s.Split(new string[] { "\t", "\t" }, StringSplitOptions.None);

            if (b.Length != 2)
            {
                return;
            }
            IdQuery = b[0];
            Title   = b[1];
            var regex = "";

            if (rg == null)
            {
                regex = @"[A-Za-z\-]+";
            }
            else
            {
                regex = rg;
            }
            IStemmer             stemmer         = new EnglishStemmer();
            var                  valueEnumerable = Regex.Matches(Title = b[1], regex);
            IEnumerable <string> Lst             = null;

            try
            {
                Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList().Except(sw.Lst.ToArray()).OrderBy(a => a);
                Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower()));
            }
            catch (Exception)
            {
                Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList().OrderBy(a => a);
                Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower()));
            }
            Dictionary <string, int> counts = Lst.GroupBy(x => x)
                                              .ToDictionary(g => g.Key,
                                                            g => g.Count());
            var ls = Lst.Distinct().ToList();

            foreach (var item in counts)
            {
                Lstq.Add(new Tuple <string, double>(item.Key, (double)item.Value / (double)ls.Count));
            }
        }
        public QueryAON(string s, Stopword sw)
        {
            lst = new List <Tuple <string, int> >();
            const string regex           = @"[A-Za-z\-()]+";
            IStemmer     stemmer         = new EnglishStemmer();
            var          valueEnumerable = Regex.Matches(s, regex);

            Lst  = new List <string>();
            Lst1 = new List <string>();
            var Lstq = new List <Tuple <string, double> >();

            if (sw != null)
            {
                var dt = sw.Lst.Where(a => a != "AND" && a != "OR" && a != "NOT").ToList();
                Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList().Except(dt.ToArray()).ToList();
                Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower()));
            }
            else
            {
                Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList();
                Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower()));
            }

            var i = 0;

            for (var j = 0; j < Lst.Count; j++)
            {
                {
                    if (Lst[j] == "and" || Lst[j] == "or" || Lst[j] == "not" || Lst[j] == "(" || Lst[j] == ")")
                    {
                        continue;
                    }
                    lst.Add(new Tuple <string, int>(Lst[j], i));
                    Lst1.Add(Lst[j]);
                    Lst[j] = i.ToString();
                    i++;
                }
            }
        }
        public Query(string s, Stopword sw, string rg)
        {
            var regex = "";

            if (rg == null)
            {
                regex = @"[A-Za-z\-]+";
            }
            else
            {
                regex = rg;
            }
            IStemmer stemmer = new EnglishStemmer();

            s = s.ToLower();
            var valueEnumerable      = Regex.Matches(s, regex);
            IEnumerable <string> Lst = null;

            Lstq = new List <Tuple <string, double> >();
            if (sw != null)
            {
                Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).ToList().Except(sw.Lst.ToArray()).OrderBy(a => a);
                Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower()));
            }
            else
            {
                Lst = valueEnumerable.Cast <Match>().Select(match => match.Value).OrderBy(a => a).ToList();
                Lst = Lst.ToList().ConvertAll(d => stemmer.Stem(d.ToLower()));
            }
            var ls = Lst.Distinct().ToList();

            foreach (var item in ls)
            {
                var l = Lst.Count(a => a == item);
                Lstq.Add(new Tuple <string, double>(item, (double)l / Lst.ToList().Count));
            }
        }