/// <summary> /// Returns all of the sentences from a book /// </summary> /// <param name="BookURL"></param> /// <returns></returns> private List <Sentence> GetSentenceFromBook(string BookURL) { string Content = File.ReadAllText(BookURL); List <Sentence> Sentences = TextPreperation.Tokenize(Content, 1000, 10, 3, TextSplitterModes.InOrder); return(Sentences); }
/// <summary> /// Loads the test/trainin data into the IO Records parameter /// </summary> /// <returns></returns> private List <SentenceIORecord> LoadIORecords() { // Book file URL's string Adventure1 = CurrentDirectory + "\\Books\\Adventure\\Tarzan of the Apes.txt"; string Adventure2 = CurrentDirectory + "\\Books\\Adventure\\The Lion of Petra.txt"; string Adventure3 = CurrentDirectory + "\\Books\\Adventure\\The Scarlet Pimpernel.txt"; string Crime1 = CurrentDirectory + "\\Books\\Crime Fiction\\Dead Men Tell No Tales.txt"; string Crime2 = CurrentDirectory + "\\Books\\Crime Fiction\\Tales of Chinatown.txt"; string Crime3 = CurrentDirectory + "\\Books\\Crime Fiction\\The Extraordinary Adventures of Arsene Lupin.txt"; string Horror1 = CurrentDirectory + "\\Books\\Horror\\Ghost Stories of an Antiquary.txt"; string Horror2 = CurrentDirectory + "\\Books\\Horror\\Metamorphosis.txt"; string Horror3 = CurrentDirectory + "\\Books\\Horror\\The Wendigo.txt"; string Romance1 = CurrentDirectory + "\\Books\\Romantic Fiction\\Only a Girl's Love.txt"; string Romance2 = CurrentDirectory + "\\Books\\Romantic Fiction\\Star of India.txt"; string Romance3 = CurrentDirectory + "\\Books\\Romantic Fiction\\Wastralls.txt"; string Science1 = CurrentDirectory + "\\Books\\Science Fiction\\Astounding Stories of Super_Science.txt"; string Science2 = CurrentDirectory + "\\Books\\Science Fiction\\The Lost World.txt"; string Science3 = CurrentDirectory + "\\Books\\Science Fiction\\The Sky Is Falling.txt"; List <Sentence> Sentences = new List <Sentence>(); // Keep track of how many sentences were used for each genre int NumberOfAdventure, NumberOfCrime, NumberOfHorror, NumberOfRomance, NumberOfScience; // Get the sentences from each of the books for each genre Sentences.AddRange(GetSentenceFromBook(Adventure1)); Sentences.AddRange(GetSentenceFromBook(Adventure2)); Sentences.AddRange(GetSentenceFromBook(Adventure3)); NumberOfAdventure = Sentences.Count; Sentences.AddRange(GetSentenceFromBook(Crime1)); Sentences.AddRange(GetSentenceFromBook(Crime2)); Sentences.AddRange(GetSentenceFromBook(Crime3)); NumberOfCrime = Sentences.Count - NumberOfAdventure; Sentences.AddRange(GetSentenceFromBook(Horror1)); Sentences.AddRange(GetSentenceFromBook(Horror2)); Sentences.AddRange(GetSentenceFromBook(Horror3)); NumberOfHorror = Sentences.Count - NumberOfAdventure; Sentences.AddRange(GetSentenceFromBook(Romance1)); Sentences.AddRange(GetSentenceFromBook(Romance2)); Sentences.AddRange(GetSentenceFromBook(Romance3)); NumberOfRomance = Sentences.Count - NumberOfHorror; Sentences.AddRange(GetSentenceFromBook(Science1)); Sentences.AddRange(GetSentenceFromBook(Science2)); Sentences.AddRange(GetSentenceFromBook(Science3)); NumberOfScience = Sentences.Count - NumberOfRomance; // Remove infrequent words Sentences = TextPreperation.RemoveInfrequentWords(Sentences, NumberOfRecordsToKeep, "UNKNOWN_TOKEN"); // Add beggining and ending tokens //Sentences = TextPreperation.AddBegginingAndEndTokens(Sentences, "SENTENCE_START", "SENTENCE_END"); // Map the word strings to integer values List <MappedSentence> MappedSentences = TextPreperation.MapSentences(Sentences); List <SentenceIORecord> IORecords = new List <SentenceIORecord>(); int MappedSentenceNumber = 0; // For each mapped sentence create an input record foreach (MappedSentence s in MappedSentences) { MappedSentenceNumber++; SentenceIORecord temp = new SentenceIORecord(); int GenreID; if (MappedSentenceNumber < NumberOfAdventure) { GenreID = 0; } else if (MappedSentenceNumber < NumberOfCrime + NumberOfAdventure) { GenreID = 1; } else if (MappedSentenceNumber < NumberOfHorror + NumberOfCrime + NumberOfAdventure) { GenreID = 2; } else if (MappedSentenceNumber < NumberOfRomance + NumberOfHorror + NumberOfCrime + NumberOfAdventure) { GenreID = 3; } else { GenreID = 4; } temp.Output = GenreID; foreach (int x in s.IDs) { temp.Inputs.Add(x); } IORecords.Add(temp); } return(IORecords); }