// Generate sentences to compare against the first sentence. public List <SentenceExample> GenerateNextSentences(List <SentenceExample> allSentences, SentenceExample firstSentence, int numAdjacentExamples, Random rand) { var adjacentClassifications = new List <int>(); var nextSentences = new List <SentenceExample>(); for (int i = 0; i < numAdjacentExamples; i++) { var nextSentence = allSentences[rand.Next(0, allSentences.Count)]; // No duplicates until the end. if (nextSentences.Contains(nextSentence) && allSentences.Count > numAdjacentExamples) { i--; continue; } adjacentClassifications.Add(nextSentence.classification); nextSentences.Add(nextSentence); } // Fill in features based on what the sentence is compared against. foreach (var sentence in nextSentences) { sentence.prevSentenceClassification = firstSentence.classification; sentence.adjacentSentenceClassification = adjacentClassifications; } return(nextSentences); }
// Create examples using sentences from txt file. Content is the filepath to the txt file. private List <SentenceExample> ParseLines(string content) { var output = new List <SentenceExample>(); string[] lines = System.IO.File.ReadAllLines(content); string sentenceToCont = null; for (int i = 0; i < lines.Length; i++) { var sentences = lines[i].Split(_delimiterChars).Where(s => !string.IsNullOrEmpty(s)).ToArray(); var endVal = sentences.Length; if (sentences.Length == 0) { continue; } // If leftover sentence from previous line, add onto the first sentence. if (!string.IsNullOrWhiteSpace(sentenceToCont)) { sentences[0] = sentenceToCont.Trim() + " " + sentences[0]; sentenceToCont = null; } // If line doesn't end with one of these punctuations, last sentence continues to next line. if (!EndsWithPunctuation(lines[i])) { sentenceToCont = sentences[sentences.Length - 1]; endVal--; } for (int j = 0; j < endVal; j++) { var words = sentences[j].Split(" "); var s = new SentenceExample(); s.sentence = sentences[j].TrimEnd() + "."; s.wordIndexes.AddRange(UpdateBagOfWords(words)); output.Add(s); } } return(output); }