/// <summary> /// Add a training document to the network /// </summary> /// <param name="trainingDoc">The training doc to add to the network</param> public void AddTrainingDoc(TrainingDoc trainingDoc) { Category cat = trainingDoc.Category; // Get partyData of same category as the TrainingDoc PartyData partyData = Data.Where(x => x.GetCategory() == cat).First(); partyData.IncrementDocCount(); // Recalculate prior probabilities as document count has changed Dictionary <Category, double> priorProbabilities = _calculator.GetPriorProbabilities(Data); foreach (KeyValuePair <Category, double> kvp in priorProbabilities) { if (kvp.Key != Category.NONE) { PartyData party = Data.Where(x => x.GetCategory() == kvp.Key).FirstOrDefault(); party.SetProbability(kvp.Value); } } // Merge the existing word set with the new document word set partyData.SetWords(MergeWords(partyData.Words, trainingDoc.Words)); // Merge the existing ngram set with the new document ngram set partyData.SetNGrams(MergeWords(partyData.NGrams, trainingDoc.NGrams)); // Recalculate relative frequencies as training set has been modified _calculator.GetRelativeFrequencies(partyData.Words); _calculator.GetRelativeFrequencies(partyData.NGrams); }
/// <summary> /// Assign the party to the document /// </summary> /// <param name="fileName">The name of the file to assign the party to</param> /// <returns></returns> private TrainingDoc AssignDocumentParty(string fileName) { ConsoleKey choiceInput; int choice; Console.Clear(); TrainingDoc td = new TrainingDoc(); Console.WriteLine("Choose the party this document belongs to:\n"); Dictionary <int, Category> categoryPairs = new Dictionary <int, Category>() { { 1, Category.CONSERVATIVE }, { 2, Category.COALITION }, { 3, Category.LABOUR } }; foreach (KeyValuePair <int, Category> pair in categoryPairs) { Console.WriteLine($"{pair.Key}. {pair.Value}"); } Console.Write("Enter an option: "); choiceInput = Console.ReadKey().Key; switch (choiceInput) { case ConsoleKey.D1: td = new TrainingDoc(fileName, Category.CONSERVATIVE); break; case ConsoleKey.D2: td = new TrainingDoc(fileName, Category.COALITION); break; case ConsoleKey.D3: td = new TrainingDoc(fileName, Category.LABOUR); break; case ConsoleKey.Escape: return(null); default: Console.WriteLine("Invalid Input!"); break; } return(td); }
/// <summary> /// Train the network with new data /// </summary> private void Train() { StartTraining: string docChoice; ConsoleKey choiceInput; int choice; bool validInput = false; TrainingDoc t = new TrainingDoc(); do { Console.Clear(); Console.WriteLine("Choose a document to train the network with\n"); string[] docs = Directory.GetFiles("TrainingDocs"); Dictionary <int, string> trainingDocPairs = new Dictionary <int, string>(); Dictionary <int, string> trainingDocPairDisplay = new Dictionary <int, string>(); for (int i = 0; i < docs.Count(); i++) { trainingDocPairs[i + 1] = docs[i]; trainingDocPairDisplay[i + 1] = docs[i].Replace("TrainingDocs\\", ""); } foreach (KeyValuePair <int, string> pair in trainingDocPairDisplay) { Console.WriteLine($"{pair.Key}. {pair.Value}"); } Console.Write("Choose an option (Type 00 to go back): "); docChoice = Console.ReadLine(); if (docChoice == "00") { return; } try { choice = Convert.ToInt32(docChoice); if (trainingDocPairs.TryGetValue(choice, out string value)) { validInput = true; t = AssignDocumentParty(value); if (t == null) { goto StartTraining; } } else { Console.WriteLine("Invalid option chosen!"); Console.ReadLine(); } } catch (FormatException e) { Console.WriteLine(e.Message + " Try a number instead!"); Console.ReadLine(); } } while (!validInput); _network.AddTrainingDoc(t); Console.WriteLine("Trained Network with \"{0}\"", t.FileName); Console.ReadLine(); Console.Clear(); SavePrompt: Console.WriteLine("Save the network to file?"); Console.WriteLine("1. Yes\n2. No\nHit enter after you have made your choice"); choiceInput = Console.ReadKey().Key; switch (choiceInput) { case ConsoleKey.D1: _network.Save(); Classify(); break; case ConsoleKey.D2: goto StartTraining; default: goto SavePrompt; } return; }