//public ActionResult LoadVocabulary(string idgc, string typew) //{ // //var jarRoot = @"D:\HsnSky\NLPTokenzie\stanford-corenlp-full-2015-12-09\edu\stanford\nlp"; // string urlRoot = System.IO.Path.Combine(Server.MapPath("~/Uploads/english-left3words"), "english-left3words-distsim.tagger"); // //var jarRoot = @"\stanford-corenlp-full-2015-12-09\edu\stanford\nlp"; // //var modelsDirectory = urlRoot + jarRoot + @"\models\pos-tagger\english-left3words"; // // Loading POS Tagger // //var tagger = new MaxentTagger(urlRoot + @"\english-left3words-distsim.tagger"); // var tagger = new MaxentTagger(urlRoot); // // Text for tagging // var data = db.Sentenses.Where(e=>e.Comment.GroupCommentId==idgc).ToList(); // //var data = db.Comments.Where(e => e.GroupCommentId == idgc).ToList(); // foreach (var item in data) // { // var text = item.ContentReview; // var sen = db.Sentenses.Find(item.Id); // string str = ""; // //var text = "Quality hotel at great price Very clean. Free breakfast with good selection. Staff friendly and most helpful. A grat stay!"; // var sentences = MaxentTagger.tokenizeText(new StringReader(text)).toArray(); // foreach (ArrayList sentence in sentences) // { // try // { // var taggedSentence = tagger.tagSentence(sentence); // string[] str1 = taggedSentence.ToString().Split(',', '[', ']'); // if (typew == "2") // { // #region Tách có tạo từ ghép // //Kiểm tra từ ghép // for (int i = 0; i < str1.Length; i++) // { // try // { // string[] str2 = str1[i].ToString().Split('/'); // string[] str3 = str1[i + 1].ToString().Split('/'); // string str4 = ""; // string type = "JJ"; // if (str2[1].Trim() == "JJ" && (str3[1].Trim() == "NN" || str3[1].Trim() == "NNS")) //Luật 1 // { // str4 = str2[0].Trim() + " " + str3[0].Trim(); // str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; // } // else if ((str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS") && str3[1].Trim() == "JJ") //Luật 2 // { // str4 = str2[0].Trim() + " " + str3[0].Trim(); // str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; // } // else if (str2[1].Trim() == "JJ" && str3[1].Trim() == "JJ") //Luật 3 // { // str4 = str2[0].Trim() + " " + str3[0].Trim(); // str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; // } // else if ((str2[1].Trim() == "NN" || str2[1].Trim() == "NNS") && str3[1].Trim() == "JJ") //Luật 4 // { // str4 = str2[0].Trim() + " " + str3[0].Trim(); // str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; // } // else if ((str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS") // && (str3[1].Trim() == "VB" || str3[1].Trim() == "VBD" || str3[1].Trim() == "VBN" || str3[1].Trim() == "VBG")) //Luật 5 // { // str4 = str2[0].Trim() + " " + str3[0].Trim(); // str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; // } // else if (str2[1].Trim() == "JJ" || str2[1].Trim() == "NN" || str2[1].Trim() == "VB" || str2[1].Trim() == "RB" //Luật 6 // || str2[1].Trim() == "VBZ" || str2[1].Trim() == "NNP" || str2[1].Trim() == "NNS") // { // str4 = str2[0].Trim(); // type = str2[1].Trim(); // str += str2[0].Trim() + "|" + str2[1].Trim() + " ;"; // } // //cập nhật từ điển // if (str4.Trim().Count() > 1) // { // var data2 = db.Vocabularies.Where(e => e.Word.Trim().ToLower().Equals(str4) && e.GroupCommentId == idgc).ToList(); // if (data2.Count <= 0) // { // Vocabulary data3 = new Vocabulary(); // string idv = Public.GetID(); // while (db.Vocabularies.Where(e => e.Id == idv).Count() > 0) // { // idv = Public.GetID(); // } // data3.Id = idv; // data3.Word = str4.Trim(); // data3.TypeWord = type.Trim(); // data3.GroupCommentId = idgc; // db.Vocabularies.Add(data3); // db.SaveChanges(); // } // } // } // catch { } // } // #endregion // } // else // { // #region Tách không tạo từ ghép // foreach (var item2 in str1) // { // try // { // if (item2.Trim() != "") // { // string[] str2 = item2.ToString().Split('/'); // if (str2[1].Trim() == "JJ" || str2[1].Trim() == "NN" || str2[1].Trim() == "VB" || str2[1].Trim() == "RB" // || str2[1].Trim() == "VBZ" || str2[1].Trim() == "NNP" || str2[1].Trim() == "NNS") // { // string str3 = str2[0].Trim().ToLower(); // //logs tách từ // str += str2[0].Trim() + "|" + str2[1].Trim() + " ;"; // //cập nhật từ điển // if (str2[0].Trim().Count() > 1) // { // var data2 = db.Vocabularies.Where(e => e.Word.Trim().ToLower().Equals(str3) && e.GroupCommentId == idgc).ToList(); // if (data2.Count <= 0) // { // Vocabulary data4 = new Vocabulary(); // string idv = Public.GetID(); // while (db.Vocabularies.Where(e => e.Id == idv).Count() > 0) // { // idv = Public.GetID(); // } // data4.Id = idv; // data4.Word = str2[0].Trim(); // data4.TypeWord = str2[1].Trim(); // data4.GroupCommentId = idgc; // db.Vocabularies.Add(data4); // db.SaveChanges(); // } // } // } // } // } // catch { } // } // #endregion // } // } // catch { } // } // sen.Logs = str; // db.Entry(sen).State = EntityState.Modified; // db.SaveChanges(); // } // CountVocabulary(idgc); // //IEnumerable<GroupComent> data6 = (IEnumerable<GroupComent>)db.GroupComents.ToList(); // //ViewBag.GroupComent = new SelectList(data, "Id", "Name", idgc); // var data5 = db.Vocabularies.Where(e => e.GroupCommentId == idgc).ToList(); // return PartialView("_ListVocabulary", data5); //} public ActionResult LoadVocabulary() { string idgc = ""; string typew = "1"; Stopwatch sw = Stopwatch.StartNew(); FileStream fs = new FileStream("D:\\hoctap\\DoAnTotNghiep\\soucecode\\stopWord_1.txt", FileMode.Open); StreamReader rd = new StreamReader(fs, Encoding.UTF8); string line = ""; Dictionary <string, string> stopword = new Dictionary <string, string>(); while ((line = rd.ReadLine()) != null) { stopword.Add(line, line); } string urlRoot = System.IO.Path.Combine(Server.MapPath("~/Uploads/english-left3words"), "english-left3words-distsim.tagger"); var tagger = new MaxentTagger(urlRoot); // Text for tagging var dataGC = db.GroupComents.Where(e => e.ProductId == "170319111826335").ToList(); foreach (var itemGC in dataGC) { idgc = itemGC.Id; var data = db.Comments.Where(e => e.GroupCommentId == idgc).ToList(); foreach (var item in data) { var text = item.Comment1; //string str = ""; var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { try { var taggedSentence = tagger.tagSentence(sentence); string[] str1 = taggedSentence.ToString().Split(',', '[', ']'); if (typew == "2") { #region Tách có tạo từ ghép //Kiểm tra từ ghép for (int i = 0; i < str1.Length; i++) { try { string[] str2 = str1[i].ToString().Split('/'); string[] str3 = str1[i + 1].ToString().Split('/'); string str4 = ""; string type = "JJ"; if (str2[1].Trim() == "JJ" && (str3[1].Trim() == "NN" || str3[1].Trim() == "NNS")) //Luật 1 { str4 = str2[0].Trim() + " " + str3[0].Trim(); //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; } else if ((str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS") && str3[1].Trim() == "JJ") //Luật 2 { str4 = str2[0].Trim() + " " + str3[0].Trim(); //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; } else if (str2[1].Trim() == "JJ" && str3[1].Trim() == "JJ") //Luật 3 { str4 = str2[0].Trim() + " " + str3[0].Trim(); //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; } else if ((str2[1].Trim() == "NN" || str2[1].Trim() == "NNS") && str3[1].Trim() == "JJ") //Luật 4 { str4 = str2[0].Trim() + " " + str3[0].Trim(); //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; } else if ((str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS") && (str3[1].Trim() == "VB" || str3[1].Trim() == "VBD" || str3[1].Trim() == "VBN" || str3[1].Trim() == "VBG")) //Luật 5 { str4 = str2[0].Trim() + " " + str3[0].Trim(); //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;" + str3[0].Trim() + "|" + str3[1].Trim() + ";"; } else if (str2[1].Trim() == "JJ" || str2[1].Trim() == "NN" || str2[1].Trim() == "RB" || str2[1].Trim() == "RBR" || str2[1].Trim() == "RBS" || str2[1].Trim() == "VBZ" || str2[1].Trim() == "VBD" || str2[1].Trim() == "VBN" || str2[1].Trim() == "VBG" || str2[1].Trim() == "VB" || str2[1].Trim() == "NNP" || str2[1].Trim() == "NNS") { str4 = str2[0].Trim(); type = str2[1].Trim(); //str += str2[0].Trim() + "|" + str2[1].Trim() + " ;"; } //cập nhật từ điển if (str4.Trim().Count() > 1) { var data2 = db.Vocabulary_1.Where(e => e.Word.Trim().ToLower().Equals(str4) && e.GroupCommentId == idgc).ToList(); if (data2.Count <= 0) { Vocabulary_1 data3 = new Vocabulary_1(); string idv = Public.GetID(); while (db.Vocabulary_1.Where(e => e.Id == idv).Count() > 0) { idv = Public.GetID(); } data3.Id = idv; data3.Word = str4.Trim(); data3.TypeWord = type.Trim(); data3.GroupCommentId = idgc; data3.Type = 2; db.Vocabulary_1.Add(data3); db.SaveChanges(); } else { var data6 = data2.FirstOrDefault(); data6.Counts++; db.Entry(data6).State = EntityState.Modified; db.SaveChanges(); } } } catch { } } #endregion } else { idgc = "170319111826335"; #region Tách không tạo từ ghép string se = ""; foreach (var item2 in str1) { try { string linei = ""; if (item2.Trim() != "") { string[] str2 = item2.ToString().Split('/'); if (str2[1].Trim() == "JJ" || str2[1].Trim() == "NN" || str2[1].Trim() == "RB" || str2[1].Trim() == "RBS" || str2[1].Trim() == "RBR" || str2[1].Trim() == "VBN" || str2[1].Trim() == "VBZ" || str2[1].Trim() == "VBD" || str2[1].Trim() == "VBG" || str2[1].Trim() == "VB" || str2[1].Trim() == "NNP" || str2[1].Trim() == "NNS") { string str3 = str2[0].Trim().ToLower(); if (stopword.TryGetValue(str3, out linei)) { continue; } if (se == "") { se = str3; } else { se = String.Concat(se, String.Concat(" ", str3)); } //cập nhật từ điển if (str2[0].Trim().Count() > 1) { string typeword = str2[1].Trim(); var data2 = db.Vocabulary_1.Where(e => e.Word.Trim().ToLower().Equals(str3) && e.TypeWord.Trim().ToLower().Equals(typeword) && e.GroupCommentId == idgc).ToList(); if (data2.Count <= 0) { Vocabulary_1 data4 = new Vocabulary_1(); string idv = Public.GetID(); while (db.Vocabulary_1.Where(e => e.Id == idv).Count() > 0) { idv = Public.GetID(); } data4.Id = idv; data4.Word = str3; data4.TypeWord = str2[1].Trim(); data4.GroupCommentId = idgc; data4.Counts = 1; data4.Type = 1; db.Vocabulary_1.Add(data4); db.SaveChanges(); } else { var data6 = data2.FirstOrDefault(); data6.Counts++; db.Entry(data6).State = EntityState.Modified; db.SaveChanges(); } } } } } catch { } } //tao cau loai bo tu dung Sentensesnotword senotopword = new Sentensesnotword(); senotopword.Id = Public.GetID();; senotopword.ContentReview = se.Trim(); senotopword.CommentId = item.Id; db.Sentensesnotwords.Add(senotopword); db.SaveChanges(); #endregion } } catch { } } } //sen.Logs = str; //db.Entry(sen).State = EntityState.Modified; //db.SaveChanges(); } //CountVocabulary(idgc); //IEnumerable<GroupComent> data6 = (IEnumerable<GroupComent>)db.GroupComents.ToList(); //ViewBag.GroupComent = new SelectList(data, "Id", "Name", idgc); ViewBag.Time = sw.ElapsedMilliseconds; sw.Stop(); var data5 = db.Vocabulary_1.ToList(); return(PartialView("_ListVocabulary", data5)); }
private void standarsetenword() { var stopword = db.StopWords.ToList(); Dictionary <string, string> dicstopword = new Dictionary <string, string>(); foreach (var kvp in stopword) { dicstopword.Add(kvp.StopWord1, kvp.StopWord1); } var listcore = db.CoreWords.ToList(); Dictionary <string, string> diccore = new Dictionary <string, string>(); foreach (var kvp in listcore) { //diccore.Add(kvp.core_word, kvp.core_word); string[] strcore = kvp.core_word.Split(','); foreach (var s in strcore) { diccore.Add(s, s); } } string urlRoot = System.IO.Path.Combine(Server.MapPath("~/Uploads/english-left3words"), "english-left3words-distsim.tagger"); var tagger = new MaxentTagger(urlRoot); var sen2 = db.Sentenses.ToList(); if (sen2.Count >= 1) { var cm = sen2; //try //{ if (cm.Count > 1) { foreach (var item in cm) { var text = item.ContentReview; //string str = ""; var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { string strse = sentence.toString(); strse = strse.Replace(",", ""); strse = strse.Replace("[", ""); strse = strse.Replace("]", ""); string ids = item.Id; var taggedSentence = tagger.tagSentence(sentence); string[] str1 = taggedSentence.ToString().Split(',', '[', ']'); string strtmp = ""; foreach (var item2 in str1) { string linei = ""; if (item2.Trim() != "") { string[] str2 = item2.ToString().Split('/'); try { if (str2.Count() == 2 && str2[1].Trim() != "") { if ((str2[1].Trim() != "NN" && !diccore.TryGetValue(str2[0].Trim().ToLower(), out linei) && str2[1].Trim() != "NNP" && str2[1].Trim() != "NNS") || dicstopword.TryGetValue(str2[0].Trim().ToLower(), out linei)) { continue; } else { strtmp += " " + str2[0].Trim().ToLower(); } } } catch { } } } Sentensesnotword senotopword = new Sentensesnotword(); /* while (db.Sentensesnotwords.Where(e => e.Id == ids).Count() > 0) * { * ids = Public.GetID(); * }*/ if (strtmp.Trim() != "" && db.Sentensesnotwords.Where(e => e.Id == ids).Count() == 0) { senotopword.Id = ids; senotopword.ContentReview = strtmp.Trim(); senotopword.CommentId = item.CommentId; db.Sentensesnotwords.Add(senotopword); db.SaveChanges(); } } } } //} //catch { } } }