private void btn_testDatafromFiles_Click(object sender, EventArgs e) { int[] countOfTrainingTweetforLanguage = { 374, 1493, 456, 12855, 971, 2169 }; FileWriter uniGramResultWriter = new FileWriter("results-unigram"); FileWriter biGramResultWriter = new FileWriter("results-bigram"); StringBuilder builderUniResult = new StringBuilder(); StringBuilder builderBiResult = new StringBuilder(); builderUniResult.Append("TweetID" + " " + "Likely Language").Append("\n"); builderBiResult.Append("TweetID" + " " + "Likely Language").Append("\n"); for (int i = 0; i < 6; i++) for (int j = 0; j < 6; j++) { labelingUniMatrixFrequency[i, j] = 0.0; labelingBiMatrixFrequency[i, j] = 0.0; } for (int i = 0; i < Languages.Length; i++) { FetchFromFolderFiles fetchFromFolder = new FetchFromFolderFiles("Testingnlp"); Hashtable languageTweetsClean = new Hashtable(); languageTweetsClean = fetchFromFolder.getTestingDataFor(Languages[i]); NaiveBayesClassifier NBC = new NaiveBayesClassifier(countOfTrainingTweetforLanguage, totalTweet, gramDictionary); foreach (DictionaryEntry entry in languageTweetsClean) { Double[] uniConfidence = NBC.ApplyBayesOnUnigram(entry.Value.ToString()); int IndexOfMaxUniConfidence = NBC.getMaxConfidence(uniConfidence); labelingUniMatrixFrequency[i, IndexOfMaxUniConfidence] = labelingUniMatrixFrequency[i, IndexOfMaxUniConfidence] + 1; builderUniResult.Append(entry.Key.ToString() + " " + Languages[IndexOfMaxUniConfidence]); builderUniResult.Append("\n"); Double[] biConfidence = NBC.ApplyBayesOnBigram(entry.Value.ToString()); int IndexOfMaxBiiConfidence = NBC.getMaxConfidence(biConfidence); labelingBiMatrixFrequency[i, IndexOfMaxBiiConfidence] = labelingBiMatrixFrequency[i, IndexOfMaxBiiConfidence] + 1; builderBiResult.Append(entry.Key.ToString() + " " + Languages[IndexOfMaxBiiConfidence]); builderBiResult.Append("\n"); } } uniGramResultWriter.resultsWriter(builderUniResult.ToString()); biGramResultWriter.resultsWriter(builderBiResult.ToString()); uniGramResultWriter.closeAnalysisWriter(); biGramResultWriter.closeAnalysisWriter(); MessageBox.Show("Done"); }
private void btn_buildNgramfromFile_Click(object sender, EventArgs e) { String[] Languages = "eu,ca,gl,es,en,pt".Split(','); FileWriter FW = new FileWriter(); for (int i = 0; i < Languages.Length; i++) { FetchFromFolderFiles fetchFromFolder = new FetchFromFolderFiles("Trainingnlp"); DataTable dataTable = fetchFromFolder.getTrainingDataFor(Languages[i]); DataParser DP = new DataParser(); DataTable cleanTable = new DataTable(); cleanTable = DP.getCleanTable(dataTable); NgramBuilder NB = new NgramBuilder(); DataTable uniGram = new DataTable(); uniGram = NB.GetGram(cleanTable, 1); double uniGramN = NB.getTotalFrequency(); DataTable smoothedUniGram = new DataTable(); smoothedUniGram = NB.applySmoothing(uniGram, 0.1); double uniGramSmoothedN = NB.getTotalFrequency(); DataTable biGram = new DataTable(); biGram = NB.GetGram(cleanTable, 2); double biGramN = NB.getTotalFrequency(); DataTable smoothedBiGram = new DataTable(); smoothedBiGram = NB.applySmoothing(biGram, 0.1); double BiGramSmoothedN = NB.getTotalFrequency(); //FileWriter FW = new FileWriter(); FW.writeUniGram(uniGram, Languages[i], "False", uniGramN); FW.writeUniGram(smoothedUniGram, Languages[i], "True", uniGramSmoothedN); FW.writeBiGram(biGram, Languages[i], "False", biGramN); FW.writeBiGram(smoothedBiGram, Languages[i], "True", BiGramSmoothedN); MessageBox.Show("Done " + Languages[i]); } FW.closeWriter(); }
private void btn_probabilityNgramfromFile_Click(object sender, EventArgs e) { FileWriter FW = new FileWriter(); for (int i = 0; i < Languages.Length; i++) { gramDictionary.Add(Languages[i], new LanguageObject()); FetchFromFolderFiles fetchFromFolder = new FetchFromFolderFiles("Trainingnlp"); DataTable dataTable = fetchFromFolder.getTrainingDataFor(Languages[i]); DataParser DP = new DataParser(); DataTable cleanTable = new DataTable(); cleanTable = DP.getCleanTable(dataTable); NgramBuilder NB = new NgramBuilder(); DataTable uniGram = new DataTable(); uniGram = NB.GetGram(cleanTable, 1); double uniGramN = NB.getTotalFrequency(); DataTable unSmoothedProbabilityUnigramDataTable = new DataTable(); unSmoothedProbabilityUnigramDataTable = NB.ConvertTableToProbabilityTable(uniGram, uniGramN); Hashtable unSmoothedProbabilityUnigram = NB.ConvertProbTabletoHashTable(unSmoothedProbabilityUnigramDataTable); gramDictionary[Languages[i]].setProbabilityUnigram(unSmoothedProbabilityUnigram,uniGramN); DataTable smoothedUniGram = new DataTable(); smoothedUniGram = NB.applySmoothing(uniGram, 0.1); double uniGramSmoothedN = NB.getTotalFrequency(); DataTable SmoothedProbabilityUnigramDataTable = new DataTable(); SmoothedProbabilityUnigramDataTable = NB.ConvertTableToProbabilityTable(smoothedUniGram, uniGramSmoothedN); Hashtable SmoothedProbabilityUnigram = NB.ConvertProbTabletoHashTable(SmoothedProbabilityUnigramDataTable); gramDictionary[Languages[i]].setSmoothedProbabilityUnigram(SmoothedProbabilityUnigram, uniGramSmoothedN); DataTable biGram = new DataTable(); biGram = NB.GetGram(cleanTable, 2); double biGramN = NB.getTotalFrequency(); DataTable UnSmoothedProbabilityBigramDataTable = new DataTable(); UnSmoothedProbabilityBigramDataTable = NB.ConvertTableToProbabilityTable(biGram, biGramN); Hashtable UnSmoothedProbabilityBigram = NB.ConvertProbTabletoHashTable(UnSmoothedProbabilityBigramDataTable); gramDictionary[Languages[i]].setProbabilityBigram(UnSmoothedProbabilityBigram, biGramN); DataTable smoothedBiGram = new DataTable(); smoothedBiGram = NB.applySmoothing(biGram, 0.1); double BiGramSmoothedN = NB.getTotalFrequency(); DataTable SmoothedProbabilityBigramDataTable = new DataTable(); SmoothedProbabilityBigramDataTable = NB.ConvertTableToProbabilityTable(smoothedBiGram, BiGramSmoothedN); Hashtable SmoothedProbabilityBigram = NB.ConvertProbTabletoHashTable(SmoothedProbabilityBigramDataTable); gramDictionary[Languages[i]].setSmoothedProbabilityBigram(SmoothedProbabilityBigram, BiGramSmoothedN); FW.writeUniGram(unSmoothedProbabilityUnigramDataTable, Languages[i], "False", uniGramN); FW.writeUniGram(SmoothedProbabilityUnigramDataTable, Languages[i], "True", uniGramSmoothedN); FW.writeNGram(UnSmoothedProbabilityBigram,Languages[i],"False",biGramN,"BiGram"); FW.writeNGram(SmoothedProbabilityBigram, Languages[i], "True", BiGramSmoothedN, "BiGram"); // If you want matrix representation include this and remove the upper 2 lines // FW.writeBiGram(UnSmoothedProbabilityBigramDataTable, Languages[i], "False", biGramN); // FW.writeBiGram(SmoothedProbabilityBigramDataTable, Languages[i], "True", BiGramSmoothedN); } FW.closeWriter(); MessageBox.Show("Done "); }