protected void analiseBtn_Click(object sender, EventArgs e) { if (FileUpload1.HasFile) { Utilities.learn(); StreamReader sr; string ext = System.IO.Path.GetExtension(this.FileUpload1.PostedFile.FileName); if (ext == ".txt") { sr = new StreamReader(FileUpload1.PostedFile.InputStream); AnalisisResult res = Utilities.analiseTxt(sr.ReadToEnd()); sr.Close(); } else if (ext == ".doc" || ext == ".docx") { string uploadsDirectory = System.Web.HttpContext.Current.Server.MapPath("/Doc_Uploads "); string docPath = System.Web.HttpContext.Current.Server.MapPath(FileUpload1.PostedFile.FileName); FileUpload1.PostedFile.SaveAs(docPath); Document document = new Document(); document.LoadFromFile(docPath); document.SaveToFile(uploadsDirectory + "\\" + "ToText.txt", FileFormat.Txt); sr = new StreamReader(uploadsDirectory + "\\" + "ToText.txt"); AnalisisResult res = Utilities.analiseTxt(sr.ReadToEnd()); sr.Close(); } else if (ext == ".html") { sr = new StreamReader(FileUpload1.PostedFile.InputStream); string withoutHtml = HtmlRemoval.StripTagsRegexCompiled(sr.ReadToEnd()); sr.Close(); AnalisisResult res = Utilities.analiseTxt(withoutHtml); } } }
public static AnalisisResult analiseTxt(String FileContent) { var con = ConfigurationManager.ConnectionStrings["BayesAIDBConnectionString"].ToString(); AnalisisResult result = new AnalisisResult(); string[] words = Regex.Split(FileContent, @"\W+"); // "@\W" split on every non-word char List <string> nonUsedWords = new List <string>(); SqlConnection myConnection = new SqlConnection(con); string queryString = "Select * from languageWord where fk_word = @pWord"; SqlCommand queryCmd = new SqlCommand(queryString, myConnection); SqlDataReader queryReader = null; queryCmd.Parameters.Add("@pWord", System.Data.SqlDbType.VarChar); bool wordUsed = false; myConnection.Open(); //Languague Classifier foreach (string word in words) { queryCmd.Parameters["@pWord"].Value = word; queryReader = queryCmd.ExecuteReader(); while (queryReader.Read()) { wordUsed = true; if (result.LangsWords.Count == 0) { trickNode nTuple = new trickNode(queryReader["fk_lang"].ToString(), 1, 0.0); result.LangsWords.Add(nTuple); } else { for (int i = 0; i < result.LangsWords.Count; i++) { trickNode currTuple = (trickNode)result.LangsWords[i]; if (currTuple.val == queryReader["fk_lang"].ToString()) { result.LangsWords.ElementAt(i).cant++; break; } else { if (i == result.LangsWords.Count - 1) { trickNode nTuple = new trickNode(queryReader["fk_lang"].ToString(), 1, 0.0); result.LangsWords.Add(nTuple); break; } } } } } if (!wordUsed) { nonUsedWords.Add(word); } wordUsed = false; queryReader.Close(); } //Category Classifier queryString = "Select * from categoryWord where fk_word = @pWord"; queryCmd.CommandText = queryString; foreach (string word in words) { queryCmd.Parameters["@pWord"].Value = word; queryReader = queryCmd.ExecuteReader(); while (queryReader.Read()) { wordUsed = true; if (result.categsWords.Count == 0) { trickNode nTuple = new trickNode(queryReader["fk_categ"].ToString(), 1, 0.0); result.categsWords.Add(nTuple); } else { for (int i = 0; i < result.categsWords.Count; i++) { trickNode currTuple = (trickNode)result.categsWords[i]; if (currTuple.val == queryReader["fk_categ"].ToString()) { result.categsWords.ElementAt(i).cant++; break; } else { if (i == result.categsWords.Count - 1) { trickNode nTuple = new trickNode(queryReader["fk_categ"].ToString(), 1, 0.0); result.categsWords.Add(nTuple); break; } } } } } if (!wordUsed) { if (!nonUsedWords.Contains(word)) { nonUsedWords.Add(word); } } wordUsed = false; queryReader.Close(); } //Language Analisis int totalLangWords = 0; foreach (trickNode node in result.LangsWords) { totalLangWords += node.cant; } string resLang = ""; double currProb = 0.0; foreach (trickNode node in result.LangsWords) { node.prob = (double)node.cant / totalLangWords; if (currProb < node.prob) { currProb = node.prob; resLang = node.val; } } result.langResult = resLang; //Bayesian Category Analisis //Previous Probability List <int> dbProbs = new List <int>(); queryString = "select count(*) cnt from (select fk_word w from categoryWord where fk_categ = @pWord) as alias"; queryCmd.CommandText = queryString; foreach (trickNode node in result.categsWords) { queryCmd.Parameters["@pWord"].Value = node.val; queryReader = queryCmd.ExecuteReader(); queryReader.Read(); dbProbs.Add((int)queryReader["cnt"]); queryReader.Close(); } //Calculate Posteriori Probability int count = 0; int totalPreviousWords = dbProbs.Sum(); string resCateg = ""; currProb = 0.0; foreach (trickNode node in result.categsWords) { node.prob = ((double)dbProbs[count] / totalPreviousWords) * ((double)node.cant / dbProbs[count]); if (currProb < node.prob) { currProb = node.prob; resCateg = node.val; } count++; } result.categResult = resCateg; //Send Learning Words insertLearningWords(nonUsedWords, result.categResult, result.langResult); myConnection.Close(); return(result); }