/// <summary> /// Query the reuter data set to find the count of each classification /// </summary> /// <param name="cat"> /// A <see cref="ClassifierObject"/> /// </param> protected override void SetTrainingCount(ClassifierObject cat) { using (SqlConnection conn = new SqlConnection(SqlConn.GetConn())) { string sql = @" SELECT count(1) as cnt FROM Spam WHERE ArticleId % 3 > 0 AND Category = @catName; "; SqlCommand cmd = new SqlCommand(sql); SqlParameter param = new SqlParameter(); param.ParameterName = "@catName"; param.Value = cat.Name; cmd.Parameters.Add(param); cmd.Connection = conn; conn.Open(); SqlDataReader reader = cmd.ExecuteReader(); while (reader.Read()) { cat.TimesInTraining = (int)reader[0]; } conn.Close(); } }
protected override DataTable GetTrainingArticles(ClassifierObject cat) { DataSet dSet = new DataSet(); DataTable dTable = new DataTable(); using (SqlConnection conn = new SqlConnection(SqlConn.GetConn())) { string sql = @" SELECT Body FROM Spam WHERE ArticleId % 3 > 0 AND Category = @catName; "; SqlCommand cmd = new SqlCommand(sql); SqlParameter param = new SqlParameter(); param.ParameterName = "@catName"; param.Value = cat.Name; cmd.Parameters.Add(param); cmd.Connection = conn; conn.Open(); // Execute command and fill dataset SqlDataAdapter sAdapter = new SqlDataAdapter(cmd); sAdapter.Fill(dSet); // Populate Data Table dTable = dSet.Tables[0]; conn.Close(); } return dTable; }
protected override DataTable GetTrainingArticles(ClassifierObject cat) { DataSet dSet = new DataSet(); DataTable dTable = new DataTable(); using (SqlConnection conn = new SqlConnection(SqlConn.GetConn() ) ) { string sql = @" SELECT Body FROM Article AS a JOIN ArticleCategory AS ac ON a.ArticleId = ac.ArticleId JOIN Category AS c ON c.CategoryId = ac.CategoryId JOIN CategoryType AS ct ON c.CategoryTypeId = ct.CategoryTypeId WHERE LewisSplit = 'TRAIN' AND ct.Name = @catName AND a.Body IS NOT NULL AND a.Body != '' AND a.Body NOT LIKE '%Shr%vs%' AND a.Body NOT LIKE 'Qtly%vs%' "; SqlCommand cmd = new SqlCommand(sql); SqlParameter param = new SqlParameter(); param.ParameterName = "@catName"; param.Value = cat.Name; cmd.Parameters.Add(param); cmd.Connection = conn; conn.Open(); // Execute command and fill dataset SqlDataAdapter sAdapter = new SqlDataAdapter(cmd); sAdapter.Fill(dSet); // Populate Data Table dTable = dSet.Tables[0]; conn.Close(); } return dTable; }
/// <summary> /// Query the reuter data set to find the count of each classification /// </summary> /// <param name="cat"> /// A <see cref="ClassifierObject"/> /// </param> protected override void SetTrainingCount(ClassifierObject cat) { using (SqlConnection conn = new SqlConnection(SqlConn.GetConn() ) ) { string sql = @" SELECT COUNT(1) AS cnt FROM Article AS a JOIN ArticleCategory AS ac ON a.ArticleId = ac.ArticleId JOIN Category AS c ON c.CategoryId = ac.CategoryId JOIN CategoryType AS ct ON c.CategoryTypeId = ct.CategoryTypeId WHERE LewisSplit = 'TRAIN' AND ct.Name = @catName AND a.Body IS NOT NULL AND a.Body != '' AND a.Body NOT LIKE '%Shr%vs%' AND a.Body NOT LIKE 'Qtly%vs%' "; SqlCommand cmd = new SqlCommand(sql); SqlParameter param = new SqlParameter(); param.ParameterName = "@catName"; param.Value = cat.Name; cmd.Parameters.Add(param); cmd.Connection = conn; conn.Open(); SqlDataReader reader = cmd.ExecuteReader(); while(reader.Read()) { cat.TimesInTraining = (int)reader[0]; } conn.Close(); } }
private void BuildTrainingObjects() { try { // Read the categories.xml file and determine how many categories are defined Dictionary<string, double> definedCategories = NaiveClassifier.ReadXml(); int numOfCats = definedCategories.Count; this.classifiers = new ClassifierObject[numOfCats]; XmlTextReader reader = new XmlTextReader(NaiveClassifier.GetTrainingFilestring()); // Initialize the values need to keep track of the XML document int classifierCount = -1; string testString = ""; string tempWord = ""; ClassifierObject classy = new ClassifierObject(""); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { testString=reader.Name; if (reader.Name == "classifier") { classifierCount++; classy = new ClassifierObject(""); this.classifiers[classifierCount] = classy; } else if (reader.Name == "word") { tempWord = ""; } } else if (reader.NodeType == XmlNodeType.Text) { if (testString == "name") { this.classifiers[classifierCount].Name = reader.Value; } else if (testString == "training_count") { this.classifiers[classifierCount].TimesInTraining = Convert.ToInt32(reader.Value); } else if (testString == "text") { tempWord = reader.Value; this.classifiers[classifierCount].AddWord(tempWord, ""); } else if (testString == "probability") { WordObject wordObj = this.classifiers[classifierCount].GetWord(tempWord); wordObj.SetProb(Convert.ToDouble(reader.Value)); } else if (testString == "times_seen") { WordObject wordObj = this.classifiers[classifierCount].GetWord(tempWord); wordObj.TimesInTraining = Convert.ToInt32(reader.Value); } } } } catch(Exception ex) { Console.WriteLine(ex.ToString()); } }
protected abstract void SetTrainingCount(ClassifierObject cat);
protected abstract DataTable GetTrainingArticles(ClassifierObject cat);