private static string deleteRecord() { string output = "deleteRecord\n"; for (int i = 0; i < count; i++) { LocalParse lp = new LocalParse(sampleArticle.ArticleId, titles[i], "n", sampleArticle.ArticleId + 1); if (!LocalParseManager.deleteRecord(lp)) { output += "Error! 删除WordContent为\"" + titles[i] + "\"的LocalParse记录失败!\n"; errorCount++; } else { output += "Ok! 删除WordContent为\"" + titles[i] + "\"的LocalParse记录成功!\n"; } } // delete Global Parse for (int i = 0; i < count; i++) { GlobalParse gp = new GlobalParse(); gp.WordContent = titles[i]; if (!GlobalParseManager.deleteRecord(gp)) { output += "Error! 删除WordContent为\"" + titles[i] + "\"的GlobalParse记录失败!测试无法继续进行。请先解决GlobalParseManager中的错误。\n"; errorCount++; } } return(output); }
// 删除记录,不对除GlobalParse表之外的任何表进行操作。仅读取传入参数中的WordContent字段 // 仅供测试函数调用 public static bool deleteRecord(GlobalParse gp) { string sqlStr = "DELETE FROM globalParse WHERE wordContent=@wContent"; SqlCommand cmd = new SqlCommand(sqlStr); cmd.Parameters.AddWithValue("@wContent", gp.WordContent); return(DBHelper.ExecSQL(cmd)); }
//根据传入的参数在数据库中增加一条记录 //注:传入参数中的articleNumber字段值需要上层设置好,本函数只管存储 //成功返回true,失败返回false public static bool addRecord(GlobalParse gp) { string wordContent = gp.WordContent; string type = gp.Type; int articleNumber = gp.ArticleNumber; string sqlStr = "INSERT INTO globalParse (wordContent, type, articleNumber) VALUES('" + wordContent + "','" + type + "'," + articleNumber + ")"; return(SqlManager.ExecSQL(sqlStr)); }
//用传入参数更新数据库中wordContent为传入参数中wordContent的那一条记录 //成功返回true,失败返回false public static bool updateRecord(GlobalParse gp) { string wordContent = gp.WordContent; string type = gp.Type; int articleNumber = gp.ArticleNumber; //update 表名 set admin='名称' where id=1 string sqlStr = "UPDATE globalParse SET type='" + type + "', articleNumber=" + articleNumber + " WHERE wordContent='" + wordContent + "'"; return(SqlManager.ExecSQL(sqlStr)); }
//根据传入参数的wordContent获取一条GlobalParse的记录(仅读取传入参数中的wordContent字段) //返回一个GlobalParse的bean public static GlobalParse selectRecordByWordContent(GlobalParse gp) { string wordContent = gp.WordContent; string sqlStr = "SELECT * FROM globalParse WHERE wordContent='" + wordContent + "'"; DataSet dataset = SqlManager.GetDataSet(sqlStr, "globalParse"); if (dataset.Tables[0].Rows.Count == 0) { return(null); } gp.Type = dataset.Tables[0].Rows[0]["type"].ToString(); gp.ArticleNumber = Convert.ToInt32(dataset.Tables[0].Rows[0]["articleNumber"]); return(gp); }
private static string deleteRecord() { string output = "deleteRecord\n"; for (int i = 0; i < count; i++) { GlobalParse gp = new GlobalParse(); gp.WordContent = titles[i]; if (!GlobalParseManager.deleteRecord(gp)) { output += "Error! 删除WordContent为\"" + titles[i] + "\"的GlobalParse记录失败!\n"; errorCount++; } else { output += "Ok! 删除WordContent为\"" + titles[i] + "\"的GlobalParse记录成功!\n"; } } return(output); }
private static string selectRecordByWordContent() { string output = "selectRecordByWordContent\n"; for (int i = 0; i < count; i++) { GlobalParse gp = new GlobalParse(); gp.WordContent = titles[i]; gp = GlobalParseManager.selectRecordByWordContent(gp); if (gp == null) { output += "Error! 通过WordContent获取GlobalParse记录失败!返回值为空。\n"; errorCount++; } else { output += "Ok! 通过WordContent获取GlobalParse记录成功!\n"; } } return(output); }
private static string updateRecord() { string output = "updateRecord\n"; for (int i = 0; i < count; i++) { GlobalParse gp = new GlobalParse(titles[i], "v", sampleArticle.ArticleId); if (!GlobalParseManager.updateRecord(gp)) { output += "Error! 调用updateRecord对WordContent为\"" + titles[i] + "\"的记录失败!返回值为false。\n"; errorCount++; return(output); } else { output += "Ok! 调用updateRecord对WordContent为\"" + titles[i] + "\"的记录成功!返回值为true。\n"; } gp = GlobalParseManager.selectRecordByWordContent(gp); if (gp == null) { output += "Error! 通过WordContent\"" + titles[i] + "\"查询记录失败!返回值为空。\n"; errorCount++; continue; } if (!gp.Type.Equals("v")) { output += "Error! 对WordContent为\"" + titles[i] + "\"的记录更新Type字段失败!Type被更改为" + gp.Type + "。\n"; errorCount++; } else { output += "Ok! 对WordContent为\"" + titles[i] + "\"的记录更新Type字段成功!Type被更改为" + gp.Type + "。\n"; } } return(output); }
private static string addRecord() { string output = "addRecord\n"; Random r = new Random(); // add Global Parse for (int i = 0; i < count; i++) { int no = r.Next(1000, 9000); titles[i] = no + ""; GlobalParse gp = new GlobalParse(titles[i], "n", sampleArticle.ArticleId); if (!GlobalParseManager.addRecord(gp)) { output += "Error! 新增GlobalParse记录\"" + titles[i] + "\"失败!测试无法继续进行。请先解决PrimaryGroupManager中的错误。\n"; errorCount++; } } for (int i = 0; i < count; i++) { LocalParse lp = new LocalParse(sampleArticle.ArticleId, titles[i], "n", sampleArticle.ArticleId); if (!LocalParseManager.addRecord(lp)) { output += "Error! 为id是\"" + sampleArticle.ArticleId + "\"的样例文章新增wordContent为\"" + titles[i] + "\"的记录失败!\n"; errorCount++; } else { output += "Ok! 为id是\"" + sampleArticle.ArticleId + "\"的样例文章新增wordContent为\"" + titles[i] + "\"的记录成功!\n"; } } return(output); }
private static string addRecord() { string output = "addRecord\n"; Random r = new Random(); for (int i = 0; i < count; i++) { int no = r.Next(1000, 9000); titles[i] = no + ""; GlobalParse gp = new GlobalParse(titles[i], "n", sampleArticle.ArticleId); if (!GlobalParseManager.addRecord(gp)) { output += "Error! 新增记录\"" + titles[i] + "\"失败!\n"; errorCount++; } else { output += "Ok! 新增记录\"" + titles[i] + "\"成功!\n"; } } return(output); }
/* * 输入:一个Article的model实例以及其content字符串 * 输出:执行结果标示符(暂定如下:0表示成功;-1~-5表示不同类型的失败) * 功能: * 1、对传入参数标示的文章内容进行分词、词频统计、TF·IDF计算等 * 2、将文章总词数(article表的wordCount字段)更新到Article表中 * 3、将分词结果更新到GlobalParse数据表中,注意参数中的articleNumber字段值要设置好:如果GlobalParse表中尚无该词记录,则 * articleNumber设为1;若GlobalParse表中已有该词记录,则读取其articleNumber并加一再存进去 * 4、将分词结果更新到LocalParse数据表中,注意参数中的count字段值的设置要设置好:如果LocalParse表中尚无该词记录,则 * count设为1;若LocalParse表中已有该词记录,则读取其count并加一再存进去 * 5、对文章进行分类,并将分类结果更新到Article2Group数据表中。若没有任何类别适合该文章,则默认应该将其分到groupId为0的类别中,该类别即“分组失败”类别或“其他”类别(视用户需求而定) * 6、返回执行结果。请实现者自己定义一下不同的返回值标示何种类型的失败 * * 用途说明:该函数为私有函数,只被addArticleWrapper函数在将新文章添加到数据库后自动调用,用于解析新文章。 * */ private static void parseArticle(Article a, string content) { // 在这里先行数据库中读出 各个 primary group 的 关键词列表 List <List <string> > allGroupKeywordList = new List <List <string> >(); List <string[]> allGroup = PrimaryGroupMananger.getAllGroups(); foreach (string[] ag in allGroup) { PrimaryGroups g = new PrimaryGroups(); g.GroupId = Convert.ToInt32(ag[0]); g.GroupName = ag[1]; allGroupKeywordList.Add(PrimaryGroupKeyWordsManager.getKeyWordsOfCertainPrimaryGroup(g)); } List <String[]> parseList = stringParse(content); a.WordCount = parseList.Count; Dictionary <String, Int32> dic = new Dictionary <String, Int32>(); int wordMount = 0; foreach (String[] tempString in parseList) { if (stop_list.Contains(tempString[0])) { continue; } if (!dic.ContainsKey(tempString[0])) { dic.Add(tempString[0], 1); wordMount++; } else { int tempInt = dic[tempString[0]]; dic[tempString[0]] = tempInt + 1; } } int articleID = a.ArticleId; Dictionary <String, Double> tf_idf = new Dictionary <String, Double>(); foreach (KeyValuePair <String, Int32> keyPair in dic) { GlobalParse tempGP = new GlobalParse(); tempGP.ArticleNumber = 1; tempGP.WordContent = keyPair.Key; tempGP.Type = "q"; if (GlobalParseManager.addRecord(tempGP) == false) { int num = GlobalParseManager.selectRecordByWordContent(tempGP).ArticleNumber + 1; tempGP.ArticleNumber = num; GlobalParseManager.updateRecord(tempGP); } tempGP = GlobalParseManager.selectRecordByWordContent(tempGP); LocalParse tempLP = new LocalParse(); tempLP.ArticleId = articleID; tempLP.WordContent = tempGP.WordContent; tempLP.Count = keyPair.Value; tempLP.Type = "q"; LocalParseManager.addRecord(tempLP); // Counting tf_idf int tf_fenzi = tempLP.Count; int tf_fenmu = wordMount; int total_document_number = ArticleManager.countArticleNum(); int document_number_with_word = tempGP.ArticleNumber; double tf_idf_value = ((double)tf_fenzi) / tf_fenmu;// *Math.Log((total_document_number / document_number_with_word), Math.E); tf_idf.Add(keyPair.Key, tf_idf_value); } //tf_idf.OrderByDescending(s => s.Value); double[] answer = new double[allGroup.Count]; for (int i = 0; i < answer.Length; i++) { answer[i] = 0; } //Console.Write(tf_idf.ElementAt(0).Key); //Console.WriteLine(other.Count); for (int i = 0; i < answer.Length; i++) { List <string> list = allGroupKeywordList[i]; if (list == null) { continue; } foreach (String tempS in list) { if (tf_idf.ContainsKey(tempS)) { answer[i] += tf_idf[tempS]; } } } Article2Group a2g = new Article2Group(); a2g.ArticleId = a.ArticleId; a2g.GroupId = max(answer); Article2GroupManager.addRecord(a2g); }