public static double testTextModel(model textModel, textTrainDataProvider tdp) { List <double> percents = new List <double>(); int total = 0; int correct = 0; for (int i = 0; i < tdp.data.Count; i++) { List <string[]> cat = tdp.getCatagory(i); string expected = tdp.getCatID(i); for (int j = 0; j < cat.Count; j++) { List <object> list = cat[j].ToList <object>(); Dictionary <object, double> pred = textModel.predict(list, true); if (same(pred.Values.ToList())) { continue; } object output = pred.Keys.ElementAt(max(pred)); if (expected.Equals(output)) { correct++; } } total += cat.Count; } return((double)correct / (double)total); }
//This will distort data proportions. //Not recommended big size, take long time public static model trainTextToGoal(model tm, double pg, textTrainDataProvider tdp, bool learnMode, bool fineMode) { model textModel = tm; double percentGoal = pg / 100; bool mode = false; model prev = textModel; double prevScore = 0; while (true) { Console.WriteLine("[ttg]: Calculating percent score"); double score = testTextModel(textModel, tdp); Console.WriteLine("[ttg]: Current percent score: " + score.ToString("0." + new string('#', 339))); if (score.CompareTo(percentGoal) >= 0) { break; } Console.WriteLine("[ttg]: compare " + score.CompareTo(prevScore)); if ((score.CompareTo(prevScore) < 0) && (!mode) && fineMode) { Console.WriteLine("[ttg]: Switching mode"); mode = true; textModel = prev; } else if ((score.CompareTo(prevScore) < 0) && (mode || !fineMode)) { return(prev); } prev = textModel; prevScore = score; Console.WriteLine("[ttg]: Starting Pass"); for (int i = 0; i < tdp.data.Count; i++) { List <string[]> cat = tdp.getCatagory(i); string expected = tdp.getCatID(i); for (int j = 0; j < cat.Count; j++) { List <object> list = cat[j].ToList <object>(); Dictionary <object, double> pred = textModel.predict(list, true); if (same(pred.Values.ToList())) { continue; } object output = pred.Keys.ElementAt(max(pred)); if (!expected.Equals(output)) { //Donald Trump: WRONG! //Maybe addfeature and remove feature are swaped because of the word: "the"? for (int k = 0; k < textModel.catagories.Count; k++) { object catid = textModel.catagories[k].id; if (output.Equals(catid)) { //Penalize wrong output if (mode) { for (int n = 0; n < list.Count; n++) { List <object> objectTest = new List <object>(); objectTest.Add(list[n]); if (!expected.Equals(textModel.predict(objectTest, true))) { textModel.removeFeature(catid, list[n], 1); } else { textModel.addFeature(expected, list[n], 1); } } } else { for (int n = 0; n < list.Count; n++) { textModel.addFeature(catid, list[n], 1); } } } else if (expected.Equals(catid)) { //re-enforce correct output if (mode) { for (int n = 0; n < list.Count; n++) { List <object> objectTest = new List <object>(); objectTest.Add(list[n]); if (expected.Equals(textModel.predict(objectTest, true))) { textModel.addFeature(expected, list[n], 1); } else { textModel.removeFeature(output, list[n], 1); } } } else { for (int n = 0; n < list.Count; n++) { textModel.removeFeature(catid, list[n], 1); } } } else if (learnMode) { //Not wrong but not right //Penalize output for (int n = 0; n < list.Count; n++) { textModel.removeFeature(catid, list[n], 1); } } } } } } Console.WriteLine("[ttg]: Pass completed"); } return(textModel); }