//public ActionResult LoadVocabulary(string idgc, string typew)
        //{
        //    //var jarRoot = @"D:\HsnSky\NLPTokenzie\stanford-corenlp-full-2015-12-09\edu\stanford\nlp";
        //    string urlRoot = System.IO.Path.Combine(Server.MapPath("~/Uploads/english-left3words"), "english-left3words-distsim.tagger");
        //    //var jarRoot = @"\stanford-corenlp-full-2015-12-09\edu\stanford\nlp";
        //    //var modelsDirectory = urlRoot + jarRoot + @"\models\pos-tagger\english-left3words";

        //    // Loading POS Tagger
        //    //var tagger = new MaxentTagger(urlRoot + @"\english-left3words-distsim.tagger");
        //    var tagger = new MaxentTagger(urlRoot);
        //    // Text for tagging
        //    var data = db.Sentenses.Where(e=>e.Comment.GroupCommentId==idgc).ToList();
        //    //var data = db.Comments.Where(e => e.GroupCommentId == idgc).ToList();

        //    foreach (var item in data)
        //    {
        //        var text = item.ContentReview;
        //        var sen = db.Sentenses.Find(item.Id);
        //        string str = "";
        //        //var text = "Quality hotel at great price Very clean. Free breakfast with good selection. Staff friendly and most helpful. A grat stay!";

        //        var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray();
        //        foreach (ArrayList sentence in sentences)
        //        {
        //            try
        //            {
        //                var taggedSentence = tagger.tagSentence(sentence);
        //                string[] str1 = taggedSentence.ToString().Split(',', '[', ']');
        //                if (typew == "2")
        //                {
        //                    #region Tách có tạo từ ghép
        //                    //Kiểm tra từ ghép
        //                    for (int i = 0; i < str1.Length; i++)
        //                    {
        //                        try
        //                        {
        //                            string[] str2 = str1[i].ToString().Split('/');
        //                            string[] str3 = str1[i + 1].ToString().Split('/');
        //                            string str4 = "";
        //                            string type = "JJ";

        //                            if (str2[1].Trim() == "JJ" && (str3[1].Trim() == "NN" || str3[1].Trim() == "NNS")) //Luật 1
        //                            {
        //                                str4 = str2[0].Trim() + " " + str3[0].Trim();
        //                                str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
        //                            }
        //                            else if ((str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS") && str3[1].Trim() == "JJ") //Luật 2
        //                            {
        //                                str4 = str2[0].Trim() + " " + str3[0].Trim();
        //                                str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
        //                            }
        //                            else if (str2[1].Trim() == "JJ" && str3[1].Trim() == "JJ") //Luật 3
        //                            {
        //                                str4 = str2[0].Trim() + " " + str3[0].Trim();
        //                                str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
        //                            }
        //                            else if ((str2[1].Trim() == "NN" || str2[1].Trim() == "NNS") && str3[1].Trim() == "JJ") //Luật 4
        //                            {
        //                                str4 = str2[0].Trim() + " " + str3[0].Trim();
        //                                str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
        //                            }
        //                            else if ((str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS")
        //                                && (str3[1].Trim() == "VB" || str3[1].Trim() == "VBD" || str3[1].Trim() == "VBN" || str3[1].Trim() == "VBG")) //Luật 5
        //                            {
        //                                str4 = str2[0].Trim() + " " + str3[0].Trim();
        //                                str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
        //                            }
        //                            else if (str2[1].Trim() == "JJ" || str2[1].Trim() == "NN" || str2[1].Trim() == "VB" || str2[1].Trim() == "RB"  //Luật 6
        //                                        || str2[1].Trim() == "VBZ" || str2[1].Trim() == "NNP" || str2[1].Trim() == "NNS")
        //                            {
        //                                str4 = str2[0].Trim();
        //                                type = str2[1].Trim();
        //                                str += str2[0].Trim() + "|" + str2[1].Trim() + " ;";
        //                            }

        //                            //cập nhật từ điển
        //                            if (str4.Trim().Count() > 1)
        //                            {
        //                                var data2 = db.Vocabularies.Where(e => e.Word.Trim().ToLower().Equals(str4) && e.GroupCommentId == idgc).ToList();
        //                                if (data2.Count <= 0)
        //                                {
        //                                    Vocabulary data3 = new Vocabulary();
        //                                    string idv = Public.GetID();
        //                                    while (db.Vocabularies.Where(e => e.Id == idv).Count() > 0)
        //                                    {
        //                                        idv = Public.GetID();
        //                                    }
        //                                    data3.Id = idv;
        //                                    data3.Word = str4.Trim();
        //                                    data3.TypeWord = type.Trim();
        //                                    data3.GroupCommentId = idgc;
        //                                    db.Vocabularies.Add(data3);
        //                                    db.SaveChanges();
        //                                }
        //                            }
        //                        }
        //                        catch { }
        //                    }
        //                    #endregion
        //                }
        //                else
        //                {
        //                    #region Tách không tạo từ ghép
        //                    foreach (var item2 in str1)
        //                    {
        //                        try
        //                        {
        //                            if (item2.Trim() != "")
        //                            {
        //                                string[] str2 = item2.ToString().Split('/');
        //                                if (str2[1].Trim() == "JJ" || str2[1].Trim() == "NN" || str2[1].Trim() == "VB" || str2[1].Trim() == "RB"
        //                                    || str2[1].Trim() == "VBZ" || str2[1].Trim() == "NNP" || str2[1].Trim() == "NNS")
        //                                {
        //                                    string str3 = str2[0].Trim().ToLower();
        //                                    //logs tách từ
        //                                    str += str2[0].Trim() + "|" + str2[1].Trim() + " ;";

        //                                    //cập nhật từ điển
        //                                    if (str2[0].Trim().Count() > 1)
        //                                    {
        //                                        var data2 = db.Vocabularies.Where(e => e.Word.Trim().ToLower().Equals(str3) && e.GroupCommentId == idgc).ToList();
        //                                        if (data2.Count <= 0)
        //                                        {
        //                                            Vocabulary data4 = new Vocabulary();
        //                                            string idv = Public.GetID();
        //                                            while (db.Vocabularies.Where(e => e.Id == idv).Count() > 0)
        //                                            {
        //                                                idv = Public.GetID();
        //                                            }
        //                                            data4.Id = idv;
        //                                            data4.Word = str2[0].Trim();
        //                                            data4.TypeWord = str2[1].Trim();
        //                                            data4.GroupCommentId = idgc;
        //                                            db.Vocabularies.Add(data4);
        //                                            db.SaveChanges();
        //                                        }
        //                                    }
        //                                }
        //                            }
        //                        }
        //                        catch { }
        //                    }
        //                    #endregion
        //                }
        //            }
        //            catch { }
        //        }

        //        sen.Logs = str;
        //        db.Entry(sen).State = EntityState.Modified;
        //        db.SaveChanges();
        //    }

        //    CountVocabulary(idgc);

        //    //IEnumerable<GroupComent> data6 = (IEnumerable<GroupComent>)db.GroupComents.ToList();
        //    //ViewBag.GroupComent = new SelectList(data, "Id", "Name", idgc);
        //    var data5 = db.Vocabularies.Where(e => e.GroupCommentId == idgc).ToList();
        //    return PartialView("_ListVocabulary", data5);
        //}

        public ActionResult LoadVocabulary()
        {
            string       idgc  = "";
            string       typew = "1";
            Stopwatch    sw    = Stopwatch.StartNew();
            FileStream   fs    = new FileStream("D:\\hoctap\\DoAnTotNghiep\\soucecode\\stopWord_1.txt", FileMode.Open);
            StreamReader rd    = new StreamReader(fs, Encoding.UTF8);
            string       line  = "";
            Dictionary <string, string> stopword = new Dictionary <string, string>();

            while ((line = rd.ReadLine()) != null)
            {
                stopword.Add(line, line);
            }

            string urlRoot = System.IO.Path.Combine(Server.MapPath("~/Uploads/english-left3words"), "english-left3words-distsim.tagger");
            var    tagger  = new MaxentTagger(urlRoot);
            // Text for tagging

            var dataGC = db.GroupComents.Where(e => e.ProductId == "170319111826335").ToList();

            foreach (var itemGC in dataGC)
            {
                idgc = itemGC.Id;
                var data = db.Comments.Where(e => e.GroupCommentId == idgc).ToList();

                foreach (var item in data)
                {
                    var text = item.Comment1;
                    //string str = "";
                    var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray();
                    foreach (ArrayList sentence in sentences)
                    {
                        try
                        {
                            var      taggedSentence = tagger.tagSentence(sentence);
                            string[] str1           = taggedSentence.ToString().Split(',', '[', ']');
                            if (typew == "2")
                            {
                                #region Tách có tạo từ ghép
                                //Kiểm tra từ ghép
                                for (int i = 0; i < str1.Length; i++)
                                {
                                    try
                                    {
                                        string[] str2 = str1[i].ToString().Split('/');
                                        string[] str3 = str1[i + 1].ToString().Split('/');
                                        string   str4 = "";
                                        string   type = "JJ";

                                        if (str2[1].Trim() == "JJ" && (str3[1].Trim() == "NN" || str3[1].Trim() == "NNS")) //Luật 1
                                        {
                                            str4 = str2[0].Trim() + " " + str3[0].Trim();
                                            //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
                                        }
                                        else if ((str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS") && str3[1].Trim() == "JJ") //Luật 2
                                        {
                                            str4 = str2[0].Trim() + " " + str3[0].Trim();
                                            //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
                                        }
                                        else if (str2[1].Trim() == "JJ" && str3[1].Trim() == "JJ") //Luật 3
                                        {
                                            str4 = str2[0].Trim() + " " + str3[0].Trim();
                                            //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
                                        }
                                        else if ((str2[1].Trim() == "NN" || str2[1].Trim() == "NNS") && str3[1].Trim() == "JJ") //Luật 4
                                        {
                                            str4 = str2[0].Trim() + " " + str3[0].Trim();
                                            //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
                                        }
                                        else if ((str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS") &&
                                                 (str3[1].Trim() == "VB" || str3[1].Trim() == "VBD" || str3[1].Trim() == "VBN" || str3[1].Trim() == "VBG")) //Luật 5
                                        {
                                            str4 = str2[0].Trim() + " " + str3[0].Trim();
                                            //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";";
                                        }
                                        else if (str2[1].Trim() == "JJ" || str2[1].Trim() == "NN" ||
                                                 str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS" ||
                                                 str2[1].Trim() == "VBZ" || str2[1].Trim() == "VBD" || str2[1].Trim() == "VBN" ||
                                                 str2[1].Trim() == "VBG" || str2[1].Trim() == "VB" ||
                                                 str2[1].Trim() == "NNP" || str2[1].Trim() == "NNS")
                                        {
                                            str4 = str2[0].Trim();
                                            type = str2[1].Trim();
                                            //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;";
                                        }

                                        //cập nhật từ điển
                                        if (str4.Trim().Count() > 1)
                                        {
                                            var data2 = db.Vocabulary_1.Where(e => e.Word.Trim().ToLower().Equals(str4) && e.GroupCommentId == idgc).ToList();
                                            if (data2.Count <= 0)
                                            {
                                                Vocabulary_1 data3 = new Vocabulary_1();
                                                string       idv   = Public.GetID();
                                                while (db.Vocabulary_1.Where(e => e.Id == idv).Count() > 0)
                                                {
                                                    idv = Public.GetID();
                                                }
                                                data3.Id             = idv;
                                                data3.Word           = str4.Trim();
                                                data3.TypeWord       = type.Trim();
                                                data3.GroupCommentId = idgc;
                                                data3.Type           = 2;
                                                db.Vocabulary_1.Add(data3);
                                                db.SaveChanges();
                                            }
                                            else
                                            {
                                                var data6 = data2.FirstOrDefault();
                                                data6.Counts++;
                                                db.Entry(data6).State = EntityState.Modified;
                                                db.SaveChanges();
                                            }
                                        }
                                    }
                                    catch { }
                                }
                                #endregion
                            }
                            else
                            {
                                idgc = "170319111826335";
                                #region Tách không tạo từ ghép
                                string se = "";
                                foreach (var item2 in str1)
                                {
                                    try
                                    {
                                        string linei = "";
                                        if (item2.Trim() != "")
                                        {
                                            string[] str2 = item2.ToString().Split('/');
                                            if (str2[1].Trim() == "JJ" || str2[1].Trim() == "NN" ||
                                                str2[1].Trim() == "RB" || str2[1].Trim() == "RBS" ||
                                                str2[1].Trim() == "RBR" || str2[1].Trim() == "VBN" ||
                                                str2[1].Trim() == "VBZ" || str2[1].Trim() == "VBD" ||
                                                str2[1].Trim() == "VBG" || str2[1].Trim() == "VB" ||
                                                str2[1].Trim() == "NNP" || str2[1].Trim() == "NNS")
                                            {
                                                string str3 = str2[0].Trim().ToLower();
                                                if (stopword.TryGetValue(str3, out linei))
                                                {
                                                    continue;
                                                }
                                                if (se == "")
                                                {
                                                    se = str3;
                                                }
                                                else
                                                {
                                                    se = String.Concat(se, String.Concat(" ", str3));
                                                }
                                                //cập nhật từ điển
                                                if (str2[0].Trim().Count() > 1)
                                                {
                                                    string typeword = str2[1].Trim();
                                                    var    data2    = db.Vocabulary_1.Where(e => e.Word.Trim().ToLower().Equals(str3) && e.TypeWord.Trim().ToLower().Equals(typeword) && e.GroupCommentId == idgc).ToList();
                                                    if (data2.Count <= 0)
                                                    {
                                                        Vocabulary_1 data4 = new Vocabulary_1();
                                                        string       idv   = Public.GetID();
                                                        while (db.Vocabulary_1.Where(e => e.Id == idv).Count() > 0)
                                                        {
                                                            idv = Public.GetID();
                                                        }
                                                        data4.Id             = idv;
                                                        data4.Word           = str3;
                                                        data4.TypeWord       = str2[1].Trim();
                                                        data4.GroupCommentId = idgc;
                                                        data4.Counts         = 1;
                                                        data4.Type           = 1;
                                                        db.Vocabulary_1.Add(data4);
                                                        db.SaveChanges();
                                                    }
                                                    else
                                                    {
                                                        var data6 = data2.FirstOrDefault();
                                                        data6.Counts++;
                                                        db.Entry(data6).State = EntityState.Modified;
                                                        db.SaveChanges();
                                                    }
                                                }
                                            }
                                        }
                                    }
                                    catch { }
                                }
                                //tao cau loai bo tu dung
                                Sentensesnotword senotopword = new Sentensesnotword();
                                senotopword.Id            = Public.GetID();;
                                senotopword.ContentReview = se.Trim();
                                senotopword.CommentId     = item.Id;
                                db.Sentensesnotwords.Add(senotopword);
                                db.SaveChanges();
                                #endregion
                            }
                        }
                        catch { }
                    }
                }
                //sen.Logs = str;
                //db.Entry(sen).State = EntityState.Modified;
                //db.SaveChanges();
            }

            //CountVocabulary(idgc);

            //IEnumerable<GroupComent> data6 = (IEnumerable<GroupComent>)db.GroupComents.ToList();
            //ViewBag.GroupComent = new SelectList(data, "Id", "Name", idgc);
            ViewBag.Time = sw.ElapsedMilliseconds;
            sw.Stop();
            var data5 = db.Vocabulary_1.ToList();
            return(PartialView("_ListVocabulary", data5));
        }
Exemple #2
0
        private void standarsetenword()
        {
            var stopword = db.StopWords.ToList();
            Dictionary <string, string> dicstopword = new Dictionary <string, string>();

            foreach (var kvp in stopword)
            {
                dicstopword.Add(kvp.StopWord1, kvp.StopWord1);
            }

            var listcore = db.CoreWords.ToList();
            Dictionary <string, string> diccore = new Dictionary <string, string>();

            foreach (var kvp in listcore)
            {
                //diccore.Add(kvp.core_word, kvp.core_word);
                string[] strcore = kvp.core_word.Split(',');
                foreach (var s in strcore)
                {
                    diccore.Add(s, s);
                }
            }
            string urlRoot = System.IO.Path.Combine(Server.MapPath("~/Uploads/english-left3words"), "english-left3words-distsim.tagger");
            var    tagger  = new MaxentTagger(urlRoot);
            var    sen2    = db.Sentenses.ToList();

            if (sen2.Count >= 1)
            {
                var cm = sen2;

                //try
                //{
                if (cm.Count > 1)
                {
                    foreach (var item in cm)
                    {
                        var text = item.ContentReview;
                        //string str = "";
                        var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray();
                        foreach (ArrayList sentence in sentences)
                        {
                            string strse = sentence.toString();
                            strse = strse.Replace(",", "");
                            strse = strse.Replace("[", "");
                            strse = strse.Replace("]", "");
                            string ids = item.Id;

                            var      taggedSentence = tagger.tagSentence(sentence);
                            string[] str1           = taggedSentence.ToString().Split(',', '[', ']');
                            string   strtmp         = "";
                            foreach (var item2 in str1)
                            {
                                string linei = "";
                                if (item2.Trim() != "")
                                {
                                    string[] str2 = item2.ToString().Split('/');
                                    try
                                    {
                                        if (str2.Count() == 2 && str2[1].Trim() != "")
                                        {
                                            if ((str2[1].Trim() != "NN" &&
                                                 !diccore.TryGetValue(str2[0].Trim().ToLower(), out linei) &&
                                                 str2[1].Trim() != "NNP" && str2[1].Trim() != "NNS") || dicstopword.TryGetValue(str2[0].Trim().ToLower(), out linei))
                                            {
                                                continue;
                                            }
                                            else
                                            {
                                                strtmp += " " + str2[0].Trim().ToLower();
                                            }
                                        }
                                    }
                                    catch { }
                                }
                            }

                            Sentensesnotword senotopword = new Sentensesnotword();

                            /* while (db.Sentensesnotwords.Where(e => e.Id == ids).Count() > 0)
                             * {
                             *   ids = Public.GetID();
                             * }*/
                            if (strtmp.Trim() != "" && db.Sentensesnotwords.Where(e => e.Id == ids).Count() == 0)
                            {
                                senotopword.Id            = ids;
                                senotopword.ContentReview = strtmp.Trim();
                                senotopword.CommentId     = item.CommentId;
                                db.Sentensesnotwords.Add(senotopword);
                                db.SaveChanges();
                            }
                        }
                    }
                }
                //}
                //catch { }
            }
        }