示例#1
0
        protected void analiseBtn_Click(object sender, EventArgs e)
        {
            if (FileUpload1.HasFile)
            {
                Utilities.learn();
                StreamReader sr;
                string       ext = System.IO.Path.GetExtension(this.FileUpload1.PostedFile.FileName);
                if (ext == ".txt")
                {
                    sr = new StreamReader(FileUpload1.PostedFile.InputStream);
                    AnalisisResult res = Utilities.analiseTxt(sr.ReadToEnd());
                    sr.Close();
                }
                else if (ext == ".doc" || ext == ".docx")
                {
                    string uploadsDirectory = System.Web.HttpContext.Current.Server.MapPath("/Doc_Uploads ");
                    string docPath          = System.Web.HttpContext.Current.Server.MapPath(FileUpload1.PostedFile.FileName);
                    FileUpload1.PostedFile.SaveAs(docPath);
                    Document document = new Document();
                    document.LoadFromFile(docPath);
                    document.SaveToFile(uploadsDirectory + "\\" + "ToText.txt", FileFormat.Txt);

                    sr = new StreamReader(uploadsDirectory + "\\" + "ToText.txt");
                    AnalisisResult res = Utilities.analiseTxt(sr.ReadToEnd());
                    sr.Close();
                }

                else if (ext == ".html")
                {
                    sr = new StreamReader(FileUpload1.PostedFile.InputStream);
                    string withoutHtml = HtmlRemoval.StripTagsRegexCompiled(sr.ReadToEnd());
                    sr.Close();
                    AnalisisResult res = Utilities.analiseTxt(withoutHtml);
                }
            }
        }
示例#2
0
        public static AnalisisResult analiseTxt(String FileContent)
        {
            var            con    = ConfigurationManager.ConnectionStrings["BayesAIDBConnectionString"].ToString();
            AnalisisResult result = new AnalisisResult();

            string[]      words        = Regex.Split(FileContent, @"\W+"); // "@\W" split on every non-word char
            List <string> nonUsedWords = new List <string>();
            SqlConnection myConnection = new SqlConnection(con);
            string        queryString  = "Select * from languageWord where fk_word = @pWord";
            SqlCommand    queryCmd     = new SqlCommand(queryString, myConnection);
            SqlDataReader queryReader  = null;

            queryCmd.Parameters.Add("@pWord", System.Data.SqlDbType.VarChar);
            bool wordUsed = false;

            myConnection.Open();

            //Languague Classifier
            foreach (string word in words)
            {
                queryCmd.Parameters["@pWord"].Value = word;
                queryReader = queryCmd.ExecuteReader();
                while (queryReader.Read())
                {
                    wordUsed = true;
                    if (result.LangsWords.Count == 0)
                    {
                        trickNode nTuple = new trickNode(queryReader["fk_lang"].ToString(), 1, 0.0);
                        result.LangsWords.Add(nTuple);
                    }
                    else
                    {
                        for (int i = 0; i < result.LangsWords.Count; i++)
                        {
                            trickNode currTuple = (trickNode)result.LangsWords[i];
                            if (currTuple.val == queryReader["fk_lang"].ToString())
                            {
                                result.LangsWords.ElementAt(i).cant++;
                                break;
                            }
                            else
                            {
                                if (i == result.LangsWords.Count - 1)
                                {
                                    trickNode nTuple = new trickNode(queryReader["fk_lang"].ToString(), 1, 0.0);
                                    result.LangsWords.Add(nTuple);
                                    break;
                                }
                            }
                        }
                    }
                }
                if (!wordUsed)
                {
                    nonUsedWords.Add(word);
                }
                wordUsed = false;
                queryReader.Close();
            }

            //Category Classifier
            queryString          = "Select * from categoryWord where fk_word = @pWord";
            queryCmd.CommandText = queryString;

            foreach (string word in words)
            {
                queryCmd.Parameters["@pWord"].Value = word;
                queryReader = queryCmd.ExecuteReader();
                while (queryReader.Read())
                {
                    wordUsed = true;
                    if (result.categsWords.Count == 0)
                    {
                        trickNode nTuple = new trickNode(queryReader["fk_categ"].ToString(), 1, 0.0);
                        result.categsWords.Add(nTuple);
                    }
                    else
                    {
                        for (int i = 0; i < result.categsWords.Count; i++)
                        {
                            trickNode currTuple = (trickNode)result.categsWords[i];
                            if (currTuple.val == queryReader["fk_categ"].ToString())
                            {
                                result.categsWords.ElementAt(i).cant++;
                                break;
                            }
                            else
                            {
                                if (i == result.categsWords.Count - 1)
                                {
                                    trickNode nTuple = new trickNode(queryReader["fk_categ"].ToString(), 1, 0.0);
                                    result.categsWords.Add(nTuple);
                                    break;
                                }
                            }
                        }
                    }
                }
                if (!wordUsed)
                {
                    if (!nonUsedWords.Contains(word))
                    {
                        nonUsedWords.Add(word);
                    }
                }
                wordUsed = false;
                queryReader.Close();
            }

            //Language Analisis
            int totalLangWords = 0;

            foreach (trickNode node in result.LangsWords)
            {
                totalLangWords += node.cant;
            }

            string resLang  = "";
            double currProb = 0.0;

            foreach (trickNode node in result.LangsWords)
            {
                node.prob = (double)node.cant / totalLangWords;
                if (currProb < node.prob)
                {
                    currProb = node.prob;
                    resLang  = node.val;
                }
            }
            result.langResult = resLang;

            //Bayesian Category Analisis
            //Previous Probability
            List <int> dbProbs = new List <int>();

            queryString          = "select count(*) cnt from (select fk_word w from categoryWord where fk_categ = @pWord) as alias";
            queryCmd.CommandText = queryString;
            foreach (trickNode node in result.categsWords)
            {
                queryCmd.Parameters["@pWord"].Value = node.val;
                queryReader = queryCmd.ExecuteReader();
                queryReader.Read();
                dbProbs.Add((int)queryReader["cnt"]);
                queryReader.Close();
            }

            //Calculate Posteriori Probability
            int    count = 0;
            int    totalPreviousWords = dbProbs.Sum();
            string resCateg           = "";

            currProb = 0.0;
            foreach (trickNode node in result.categsWords)
            {
                node.prob = ((double)dbProbs[count] / totalPreviousWords) * ((double)node.cant / dbProbs[count]);
                if (currProb < node.prob)
                {
                    currProb = node.prob;
                    resCateg = node.val;
                }
                count++;
            }
            result.categResult = resCateg;

            //Send Learning Words
            insertLearningWords(nonUsedWords, result.categResult, result.langResult);
            myConnection.Close();
            return(result);
        }