예제 #1
0
        /// <summary>
        /// Query the reuter data set to find the count of each classification
        /// </summary>
        /// <param name="cat">
        /// A <see cref="ClassifierObject"/>
        /// </param>
        protected override void SetTrainingCount(ClassifierObject cat)
        {
            using (SqlConnection conn = new SqlConnection(SqlConn.GetConn()))
            {
                string sql = @"
                    SELECT	count(1) as cnt
                    FROM	Spam
                    WHERE	ArticleId % 3 > 0
                    AND		Category = @catName;
                ";

                SqlCommand cmd = new SqlCommand(sql);
                SqlParameter param = new SqlParameter();
                param.ParameterName = "@catName";
                param.Value = cat.Name;
                cmd.Parameters.Add(param);

                cmd.Connection = conn;
                conn.Open();

                SqlDataReader reader = cmd.ExecuteReader();

                while (reader.Read())
                {
                    cat.TimesInTraining = (int)reader[0];
                }

                conn.Close();
            }
        }
예제 #2
0
        protected override DataTable GetTrainingArticles(ClassifierObject cat)
        {
            DataSet dSet = new DataSet();
            DataTable dTable = new DataTable();

            using (SqlConnection conn = new SqlConnection(SqlConn.GetConn()))
            {
                string sql = @"
                    SELECT	Body
                    FROM	Spam
                    WHERE	ArticleId % 3 > 0
                    AND		Category = @catName;
                ";

                SqlCommand cmd = new SqlCommand(sql);
                SqlParameter param = new SqlParameter();
                param.ParameterName = "@catName";
                param.Value = cat.Name;
                cmd.Parameters.Add(param);

                cmd.Connection = conn;
                conn.Open();

                // Execute command and fill dataset
                SqlDataAdapter sAdapter = new SqlDataAdapter(cmd);
                sAdapter.Fill(dSet);

                // Populate Data Table
                dTable = dSet.Tables[0];
                conn.Close();
            }

            return dTable;
        }
예제 #3
0
        protected override DataTable GetTrainingArticles(ClassifierObject cat)
        {
            DataSet dSet = new DataSet();
            DataTable dTable = new DataTable();

            using (SqlConnection conn = new SqlConnection(SqlConn.GetConn() ) )
            {
                string sql = @"
                SELECT	Body
                FROM	Article AS a
                JOIN	ArticleCategory AS ac
                ON		a.ArticleId = ac.ArticleId
                JOIN	Category AS c
                ON		c.CategoryId = ac.CategoryId
                JOIN	CategoryType AS ct
                ON		c.CategoryTypeId = ct.CategoryTypeId
                WHERE	LewisSplit = 'TRAIN'
                AND		ct.Name = @catName
                AND     a.Body IS NOT NULL
                AND     a.Body != ''
                AND     a.Body NOT LIKE '%Shr%vs%'
                AND     a.Body NOT LIKE 'Qtly%vs%'
                ";

                SqlCommand cmd = new SqlCommand(sql);
                SqlParameter param = new SqlParameter();
                param.ParameterName = "@catName";
                param.Value = cat.Name;
                cmd.Parameters.Add(param);

                cmd.Connection = conn;
                conn.Open();

                // Execute command and fill dataset
                SqlDataAdapter sAdapter = new SqlDataAdapter(cmd);
                sAdapter.Fill(dSet);

                // Populate Data Table
                dTable = dSet.Tables[0];

                conn.Close();
            }

            return dTable;
        }
예제 #4
0
        /// <summary>
        /// Query the reuter data set to find the count of each classification
        /// </summary>
        /// <param name="cat">
        /// A <see cref="ClassifierObject"/>
        /// </param>
        protected override void SetTrainingCount(ClassifierObject cat)
        {
            using (SqlConnection conn = new SqlConnection(SqlConn.GetConn() ) )
            {
                string sql = @"
                SELECT	COUNT(1) AS cnt
                FROM	Article AS a
                JOIN	ArticleCategory AS ac
                ON		a.ArticleId = ac.ArticleId
                JOIN	Category AS c
                ON		c.CategoryId = ac.CategoryId
                JOIN	CategoryType AS ct
                ON		c.CategoryTypeId = ct.CategoryTypeId
                WHERE	LewisSplit = 'TRAIN'
                AND		ct.Name = @catName
                AND     a.Body IS NOT NULL
                AND     a.Body != ''
                AND     a.Body NOT LIKE '%Shr%vs%'
                AND     a.Body NOT LIKE 'Qtly%vs%'
                ";

                SqlCommand cmd = new SqlCommand(sql);
                SqlParameter param = new SqlParameter();
                param.ParameterName = "@catName";
                param.Value = cat.Name;
                cmd.Parameters.Add(param);

                cmd.Connection = conn;
                conn.Open();

                SqlDataReader reader = cmd.ExecuteReader();

                while(reader.Read())
                {
                    cat.TimesInTraining = (int)reader[0];
                }

                conn.Close();
            }
        }
예제 #5
0
        private void BuildTrainingObjects()
        {
            try
            {
                // Read the categories.xml file and determine how many categories are defined
                Dictionary<string, double> definedCategories = NaiveClassifier.ReadXml();
                int numOfCats = definedCategories.Count;
                this.classifiers = new ClassifierObject[numOfCats];

                XmlTextReader reader = new XmlTextReader(NaiveClassifier.GetTrainingFilestring());

                // Initialize the values need to keep track of the XML document
                int classifierCount = -1;
                string testString = "";
                string tempWord = "";
                ClassifierObject classy = new ClassifierObject("");

                while (reader.Read())
                {
                    if (reader.NodeType == XmlNodeType.Element)
                    {
                       	testString=reader.Name;
                        if (reader.Name == "classifier")
                        {
                            classifierCount++;
                            classy = new ClassifierObject("");
                            this.classifiers[classifierCount] = classy;
                        }
                        else if (reader.Name == "word")
                        {
                            tempWord = "";
                        }
                    }
                    else if (reader.NodeType == XmlNodeType.Text)
                    {
                        if (testString == "name")
                        {
                            this.classifiers[classifierCount].Name = reader.Value;
                        }
                        else if (testString == "training_count")
                        {
                            this.classifiers[classifierCount].TimesInTraining = Convert.ToInt32(reader.Value);
                        }
                        else if (testString == "text")
                        {
                            tempWord = reader.Value;
                            this.classifiers[classifierCount].AddWord(tempWord, "");
                        }
                        else if (testString == "probability")
                        {
                            WordObject wordObj = this.classifiers[classifierCount].GetWord(tempWord);
                            wordObj.SetProb(Convert.ToDouble(reader.Value));
                        }
                        else if (testString == "times_seen")
                        {
                            WordObject wordObj = this.classifiers[classifierCount].GetWord(tempWord);
                            wordObj.TimesInTraining = Convert.ToInt32(reader.Value);
                        }
                    }
                }
            }
            catch(Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
        }
예제 #6
0
 protected abstract void SetTrainingCount(ClassifierObject cat);
예제 #7
0
 protected abstract DataTable GetTrainingArticles(ClassifierObject cat);