public void AddOwningPhrase_KeyTermPart_ThrowsInvalidOperationException() { var part = new ParsedPart(new KeyTermMatchSurrogate("blah snerb", "bleh", "bloh")); Assert.AreEqual(PartType.KeyTerm, part.Type); Assert.Throws <InvalidOperationException>(() => part.AddOwningPhrase(new Question())); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets or creates a part matching the given sub-phrase. /// </summary> /// <param name="words">The words of the sub-phrase.</param> /// <param name="owningPhraseOfPart">The owning phrase of the part to find or create.</param> /// ------------------------------------------------------------------------------------ private ParsedPart GetOrCreatePart(IEnumerable <Word> words, Question owningPhraseOfPart) { Debug.Assert(words.Any()); ParsedPart part = null; Dictionary <Word, List <ParsedPart> > partsTable; List <ParsedPart> parts = null; if (m_partsTable.TryGetValue(words.Count(), out partsTable)) { if (partsTable.TryGetValue(words.First(), out parts)) { part = parts.FirstOrDefault(x => x.Words.SequenceEqual(words)); } } else { m_partsTable[words.Count()] = partsTable = new Dictionary <Word, List <ParsedPart> >(); } if (parts == null) { partsTable[words.First()] = parts = new List <ParsedPart>(); } if (part == null) { part = new ParsedPart(words); parts.Add(part); } part.AddOwningPhrase(owningPhraseOfPart); return(part); }
public void SetText_WordsNotPreviouslySet_SetsText() { var part = new ParsedPart(); part.Text = "cool beans"; Assert.AreEqual(2, part.Words.Count); Assert.AreEqual("cool beans", part.Text); }
public void SetWords_TextNotPreviouslySet_SetsWords() { var part = new ParsedPart(); part.Words = new List <Word>(new Word[] { "cool", "beans" }); Assert.AreEqual(2, part.Words.Count); Assert.AreEqual("cool beans", part.Text); }
public void Construct_FromWords_CreatesTranslatablePart() { var part = new ParsedPart(new Word[] { "cool", "beans" }); Assert.AreEqual(PartType.TranslatablePart, part.Type); Assert.AreEqual(2, part.Words.Count); Assert.AreEqual("cool beans", part.Text); }
public void SetWords_TextAlreadySet_ThrowsInvalidOperationException() { var part = new ParsedPart(); part.Words = new List <Word>(new Word[] { "cool", "beans" }); Assert.AreEqual("cool beans", part.Text); Assert.Throws <InvalidOperationException>(() => { part.Words = new List <Word>(new Word[] { "flog", "legs" }); }); }
public void AddOwningPhrase_NullList_CreatesNewList() { var part = new ParsedPart(new Word[] { "cool", "beans" }); Assert.AreEqual(PartType.TranslatablePart, part.Type); var q = new Question(); q.Text = "Why?"; part.AddOwningPhrase(q); Assert.AreEqual("Why?", part.Owners.Single().Text); }
public void Construct_FromKeyTermMatchSurrogate_CreatesKeyTermPart() { var part = new ParsedPart(new KeyTermMatchSurrogate("blah snerb", "bleh", "bloh")); Assert.AreEqual(PartType.KeyTerm, part.Type); Assert.AreEqual(2, part.Words.Count); Assert.AreEqual("blah snerb", part.Text); Assert.AreEqual(2, part.Words.Count); Assert.AreEqual("blah", part.Words[0].Text); Assert.AreEqual("snerb", part.Words[1].Text); }
public void AddOwningPhrase_ExistingList_AddsToList() { var part = new ParsedPart(new Word[] { "beans" }); var q = new Question(); q.Text = "Why?"; part.AddOwningPhrase(q); q = new Question(); q.Text = "When?"; part.AddOwningPhrase(q); Assert.AreEqual("Why?", part.Owners.First().Text); Assert.AreEqual("When?", part.Owners.Skip(1).First().Text); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Finds the longest phrase that is a sub-phrase of the specified part. /// </summary> /// <param name="part">The part.</param> /// ------------------------------------------------------------------------------------ private SubPhraseMatch FindSubPhraseMatch(ParsedPart part) { if (m_questionWords != null && m_questionWords.Contains(part.Text)) { return(null); } int partWordCount = part.Words.Count; for (int subPhraseWordCount = partWordCount - 1; subPhraseWordCount > 0; subPhraseWordCount--) { Dictionary <Word, List <ParsedPart> > subPhraseTable; if (!m_partsTable.TryGetValue(subPhraseWordCount, out subPhraseTable)) { continue; } for (int iWord = 0; iWord < partWordCount; iWord++) { Word word = part.Words[iWord]; if (iWord + subPhraseWordCount > partWordCount) { break; // There aren't enough words left in this part to find a match } if (subPhraseWordCount == 1 && prepositionsAndArticles.Contains(word)) { break; // Don't want to split a phrase using a part that consists of a single preposition or article. } List <ParsedPart> possibleSubParts; if (subPhraseTable.TryGetValue(word, out possibleSubParts)) { foreach (ParsedPart possibleSubPart in possibleSubParts) { int iWordTemp = iWord + 1; int isubWord = 1; int possiblePartWordCount = possibleSubPart.Words.Count; while (isubWord < possiblePartWordCount && possibleSubPart.Words[isubWord] == part.Words[iWordTemp++]) { isubWord++; } if (isubWord == possiblePartWordCount) { return(new SubPhraseMatch(iWord, possibleSubPart)); } } } } } return(null); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Performs the parsing logic to divide question text into translatable parts and key term parts. /// </summary> /// ------------------------------------------------------------------------------------ private void Parse() { if (m_partsTable.Any()) { throw new InvalidOperationException("Parse called more than once."); } foreach (Question question in GetQuestions()) { ParseQuestion(question); } for (int wordCount = m_partsTable.Keys.Max(); wordCount > 0; wordCount--) { Dictionary <Word, List <ParsedPart> > partsTable; if (!m_partsTable.TryGetValue(wordCount, out partsTable)) { continue; } int maxAllowableOccurrencesForSplitting = Math.Max(2, (26 - 2 ^ wordCount) / 2); List <ParsedPart> partsToDelete = new List <ParsedPart>(); foreach (KeyValuePair <Word, List <ParsedPart> > phrasePartPair in partsTable) // REVIEW: problem: won't be able to add a new part that starts with this word - Is this really a problem? { foreach (ParsedPart part in phrasePartPair.Value) { int numberOfOccurrencesOfPart = part.Owners.Count(); if (numberOfOccurrencesOfPart > maxAllowableOccurrencesForSplitting) { continue; } // Look to see if some other part is a sub-phrase of this part. SubPhraseMatch match = FindSubPhraseMatch(part); // Should an uncommon match be able to break a common one? If not, should we keep looking for a better sub-phrase match? if (match != null /* && NEEDS WORK: part.Owners.Count() < match.Part.Owners.Count() * 2*/) { foreach (var owningPhraseOfPart in part.Owners) { //Question owningPhraseOfPart = part.Owners.First(); int iPart = owningPhraseOfPart.ParsedParts.IndexOf(part); // Deal with any preceding remainder if (match.StartIndex > 0) { ParsedPart preceedingPart = GetOrCreatePart(part.GetSubWords(0, match.StartIndex), owningPhraseOfPart); owningPhraseOfPart.ParsedParts.Insert(iPart++, preceedingPart); } match.Part.AddOwningPhrase(owningPhraseOfPart); owningPhraseOfPart.ParsedParts[iPart++] = match.Part; // Deal with any following remainder // Breaks this part at the given position because an existing part was found to be a // substring of this part. Any text before the part being excluded will be broken off // as a new part and returned. Any text following the part being excluded will be kept // as this part's contents. if (match.StartIndex + match.Part.Words.Count < part.Words.Count) { ParsedPart followingPart = GetOrCreatePart(part.GetSubWords(match.StartIndex + match.Part.Words.Count), owningPhraseOfPart); owningPhraseOfPart.ParsedParts.Insert(iPart, followingPart); } partsToDelete.Add(part); } } } } foreach (ParsedPart partToDelete in partsToDelete) { partsTable[partToDelete.Words[0]].Remove(partToDelete); } } }
public SubPhraseMatch(int startIndex, ParsedPart part) { StartIndex = startIndex; Part = part; }