private void btn_buildNgramfromDB_Click(object sender, EventArgs e) { String[] Languages = "'eu';,'ca';,'gl';,'es';,'en';,'pt';".Split(','); FileWriter FW = new FileWriter(); for (int i = 0; i < Languages.Length; i++) { FetchFromDB fetchFromDatabase = new FetchFromDB(); DataTable dataTable = fetchFromDatabase.getTrainingDataFor(Languages[i]); fetchFromDatabase.closeConnection(); DataParser DP = new DataParser(); DataTable cleanTable = new DataTable(); cleanTable = DP.getCleanTable(dataTable); NgramBuilder NB = new NgramBuilder(); DataTable uniGram = new DataTable(); uniGram = NB.GetGram(cleanTable, 1); double uniGramN = NB.getTotalFrequency(); DataTable smoothedUniGram = new DataTable(); smoothedUniGram = NB.applySmoothing(uniGram, 0.1); double uniGramSmoothedN = NB.getTotalFrequency(); DataTable biGram = new DataTable(); biGram = NB.GetGram(cleanTable, 2); double biGramN = NB.getTotalFrequency(); DataTable smoothedBiGram = new DataTable(); smoothedBiGram = NB.applySmoothing(biGram, 0.1); double BiGramSmoothedN = NB.getTotalFrequency(); //FileWriter FW = new FileWriter(); FW.writeUniGram(uniGram, Languages[i], "False", uniGramN); FW.writeUniGram(smoothedUniGram, Languages[i], "True", uniGramSmoothedN); FW.writeBiGram(biGram, Languages[i], "False", biGramN); FW.writeBiGram(smoothedBiGram, Languages[i], "True", BiGramSmoothedN); MessageBox.Show("Done " + Languages[i]); } FW.closeWriter(); }
public void testFromDB() { NaiveBayesClassifier NB; RegexDataParser RDP; FetchFromDB fetchFromDb; UtilityFunctions utilityFunction = new UtilityFunctions(); Hashtable Vocabulary = utilityFunction.TableToHashSumAdded((new FetchFromDB()).getVocabulary()); DBProgressBar.ForeColor = Color.Blue; DBProgressBar.Value = 0; DBProgressBar.Visible = true; for (int i = 0; i < freqTables.Length; i++) { NB = new NaiveBayesClassifier(freqTables); RDP = new RegexDataParser(); fetchFromDb = new FetchFromDB(); DataTable dataTable = fetchFromDb.getTrainingDataFor(rawTables[i], 0); foreach (DataRow row in dataTable.Rows) // Loop over the rows. foreach (var item in row.ItemArray) // Loop over the items. { String Lyrics = RDP.StripTagsRegexCompiled(item.ToString()); String[] lyric = Lyrics.Split(' '); List<string> containinginVocab = new List<string>(); for (int l = 0; l < lyric.Length; l++) { if (Vocabulary.ContainsKey(lyric[l])) containinginVocab.Add(lyric[l]); } Double[] classification = NB.ApplyBayes(containinginVocab.ToArray()); int indx = NB.getMaxConfidence(classification); confusionMatrix[i, indx] = confusionMatrix[i, indx] + 1; } dataTable = null; DBProgressBar.Value = DBProgressBar.Value + 100 / freqTables.Length; } Double[] rowFrequency = utilityFunction.getRowCount(confusionMatrix); Double[,] PercentageMatrix = new Double[rawTables.Length, rawTables.Length]; for (int i = 0; i < 5; i++) for (int j = 0; j < 5; j++) PercentageMatrix[i, j] = (confusionMatrix[i, j] * 100) / rowFrequency[i]; String[] Genres = { "Rap", "Country", "Religion", "Rock", "Reggae" }; InsertIntoDB insertMatrix = new InsertIntoDB(); insertMatrix.insertConfusionMatrix(PercentageMatrix, Genres); ReportForm rf = new ReportForm(); rf.Show(); }
public void trainFromDB() { DBProgressBar.ForeColor = Color.Blue; for (int i = 0; i < rawTables.Length; i++) { FetchFromDB Fetch = new FetchFromDB(); UtilityFunctions UF = new UtilityFunctions(); DataTable dt = Fetch.getTrainingDataFor(rawTables[i], 1); Hashtable ht = UF.TableToHashSumAdded(dt); InsertIntoDB insertToDB = new InsertIntoDB(); insertToDB.InsertDataFor(freqTables[i], ht); DBProgressBar.Value = DBProgressBar.Value + 100 / rawTables.Length; } }