private void btn_buildNgramfromDB_Click(object sender, EventArgs e) { String[] Languages = "'eu';,'ca';,'gl';,'es';,'en';,'pt';".Split(','); FileWriter FW = new FileWriter(); for (int i = 0; i < Languages.Length; i++) { FetchFromDB fetchFromDatabase = new FetchFromDB(); DataTable dataTable = fetchFromDatabase.getTrainingDataFor(Languages[i]); fetchFromDatabase.closeConnection(); DataParser DP = new DataParser(); DataTable cleanTable = new DataTable(); cleanTable = DP.getCleanTable(dataTable); NgramBuilder NB = new NgramBuilder(); DataTable uniGram = new DataTable(); uniGram = NB.GetGram(cleanTable, 1); double uniGramN = NB.getTotalFrequency(); DataTable smoothedUniGram = new DataTable(); smoothedUniGram = NB.applySmoothing(uniGram, 0.1); double uniGramSmoothedN = NB.getTotalFrequency(); DataTable biGram = new DataTable(); biGram = NB.GetGram(cleanTable, 2); double biGramN = NB.getTotalFrequency(); DataTable smoothedBiGram = new DataTable(); smoothedBiGram = NB.applySmoothing(biGram, 0.1); double BiGramSmoothedN = NB.getTotalFrequency(); //FileWriter FW = new FileWriter(); FW.writeUniGram(uniGram, Languages[i], "False", uniGramN); FW.writeUniGram(smoothedUniGram, Languages[i], "True", uniGramSmoothedN); FW.writeBiGram(biGram, Languages[i], "False", biGramN); FW.writeBiGram(smoothedBiGram, Languages[i], "True", BiGramSmoothedN); MessageBox.Show("Done " + Languages[i]); } FW.closeWriter(); }
private void btn_probabilityNgramfromFile_Click(object sender, EventArgs e) { FileWriter FW = new FileWriter(); for (int i = 0; i < Languages.Length; i++) { gramDictionary.Add(Languages[i], new LanguageObject()); FetchFromFolderFiles fetchFromFolder = new FetchFromFolderFiles("Trainingnlp"); DataTable dataTable = fetchFromFolder.getTrainingDataFor(Languages[i]); DataParser DP = new DataParser(); DataTable cleanTable = new DataTable(); cleanTable = DP.getCleanTable(dataTable); NgramBuilder NB = new NgramBuilder(); DataTable uniGram = new DataTable(); uniGram = NB.GetGram(cleanTable, 1); double uniGramN = NB.getTotalFrequency(); DataTable unSmoothedProbabilityUnigramDataTable = new DataTable(); unSmoothedProbabilityUnigramDataTable = NB.ConvertTableToProbabilityTable(uniGram, uniGramN); Hashtable unSmoothedProbabilityUnigram = NB.ConvertProbTabletoHashTable(unSmoothedProbabilityUnigramDataTable); gramDictionary[Languages[i]].setProbabilityUnigram(unSmoothedProbabilityUnigram,uniGramN); DataTable smoothedUniGram = new DataTable(); smoothedUniGram = NB.applySmoothing(uniGram, 0.1); double uniGramSmoothedN = NB.getTotalFrequency(); DataTable SmoothedProbabilityUnigramDataTable = new DataTable(); SmoothedProbabilityUnigramDataTable = NB.ConvertTableToProbabilityTable(smoothedUniGram, uniGramSmoothedN); Hashtable SmoothedProbabilityUnigram = NB.ConvertProbTabletoHashTable(SmoothedProbabilityUnigramDataTable); gramDictionary[Languages[i]].setSmoothedProbabilityUnigram(SmoothedProbabilityUnigram, uniGramSmoothedN); DataTable biGram = new DataTable(); biGram = NB.GetGram(cleanTable, 2); double biGramN = NB.getTotalFrequency(); DataTable UnSmoothedProbabilityBigramDataTable = new DataTable(); UnSmoothedProbabilityBigramDataTable = NB.ConvertTableToProbabilityTable(biGram, biGramN); Hashtable UnSmoothedProbabilityBigram = NB.ConvertProbTabletoHashTable(UnSmoothedProbabilityBigramDataTable); gramDictionary[Languages[i]].setProbabilityBigram(UnSmoothedProbabilityBigram, biGramN); DataTable smoothedBiGram = new DataTable(); smoothedBiGram = NB.applySmoothing(biGram, 0.1); double BiGramSmoothedN = NB.getTotalFrequency(); DataTable SmoothedProbabilityBigramDataTable = new DataTable(); SmoothedProbabilityBigramDataTable = NB.ConvertTableToProbabilityTable(smoothedBiGram, BiGramSmoothedN); Hashtable SmoothedProbabilityBigram = NB.ConvertProbTabletoHashTable(SmoothedProbabilityBigramDataTable); gramDictionary[Languages[i]].setSmoothedProbabilityBigram(SmoothedProbabilityBigram, BiGramSmoothedN); FW.writeUniGram(unSmoothedProbabilityUnigramDataTable, Languages[i], "False", uniGramN); FW.writeUniGram(SmoothedProbabilityUnigramDataTable, Languages[i], "True", uniGramSmoothedN); FW.writeNGram(UnSmoothedProbabilityBigram,Languages[i],"False",biGramN,"BiGram"); FW.writeNGram(SmoothedProbabilityBigram, Languages[i], "True", BiGramSmoothedN, "BiGram"); // If you want matrix representation include this and remove the upper 2 lines // FW.writeBiGram(UnSmoothedProbabilityBigramDataTable, Languages[i], "False", biGramN); // FW.writeBiGram(SmoothedProbabilityBigramDataTable, Languages[i], "True", BiGramSmoothedN); } FW.closeWriter(); MessageBox.Show("Done "); }