Exemplo n.º 1
0
        public void AddOwningPhrase_KeyTermPart_ThrowsInvalidOperationException()
        {
            var part = new ParsedPart(new KeyTermMatchSurrogate("blah snerb", "bleh", "bloh"));

            Assert.AreEqual(PartType.KeyTerm, part.Type);
            Assert.Throws <InvalidOperationException>(() => part.AddOwningPhrase(new Question()));
        }
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Gets or creates a part matching the given sub-phrase.
        /// </summary>
        /// <param name="words">The words of the sub-phrase.</param>
        /// <param name="owningPhraseOfPart">The owning phrase of the part to find or create.</param>
        /// ------------------------------------------------------------------------------------
        private ParsedPart GetOrCreatePart(IEnumerable <Word> words, Question owningPhraseOfPart)
        {
            Debug.Assert(words.Any());
            ParsedPart part = null;

            Dictionary <Word, List <ParsedPart> > partsTable;
            List <ParsedPart> parts = null;

            if (m_partsTable.TryGetValue(words.Count(), out partsTable))
            {
                if (partsTable.TryGetValue(words.First(), out parts))
                {
                    part = parts.FirstOrDefault(x => x.Words.SequenceEqual(words));
                }
            }
            else
            {
                m_partsTable[words.Count()] = partsTable = new Dictionary <Word, List <ParsedPart> >();
            }

            if (parts == null)
            {
                partsTable[words.First()] = parts = new List <ParsedPart>();
            }

            if (part == null)
            {
                part = new ParsedPart(words);
                parts.Add(part);
            }

            part.AddOwningPhrase(owningPhraseOfPart);

            return(part);
        }
Exemplo n.º 3
0
        public void SetText_WordsNotPreviouslySet_SetsText()
        {
            var part = new ParsedPart();

            part.Text = "cool beans";
            Assert.AreEqual(2, part.Words.Count);
            Assert.AreEqual("cool beans", part.Text);
        }
Exemplo n.º 4
0
        public void SetWords_TextNotPreviouslySet_SetsWords()
        {
            var part = new ParsedPart();

            part.Words = new List <Word>(new Word[] { "cool", "beans" });
            Assert.AreEqual(2, part.Words.Count);
            Assert.AreEqual("cool beans", part.Text);
        }
Exemplo n.º 5
0
        public void Construct_FromWords_CreatesTranslatablePart()
        {
            var part = new ParsedPart(new Word[] { "cool", "beans" });

            Assert.AreEqual(PartType.TranslatablePart, part.Type);
            Assert.AreEqual(2, part.Words.Count);
            Assert.AreEqual("cool beans", part.Text);
        }
Exemplo n.º 6
0
        public void SetWords_TextAlreadySet_ThrowsInvalidOperationException()
        {
            var part = new ParsedPart();

            part.Words = new List <Word>(new Word[] { "cool", "beans" });
            Assert.AreEqual("cool beans", part.Text);
            Assert.Throws <InvalidOperationException>(() => {
                part.Words = new List <Word>(new Word[] { "flog", "legs" });
            });
        }
Exemplo n.º 7
0
        public void AddOwningPhrase_NullList_CreatesNewList()
        {
            var part = new ParsedPart(new Word[] { "cool", "beans" });

            Assert.AreEqual(PartType.TranslatablePart, part.Type);
            var q = new Question();

            q.Text = "Why?";
            part.AddOwningPhrase(q);
            Assert.AreEqual("Why?", part.Owners.Single().Text);
        }
Exemplo n.º 8
0
        public void Construct_FromKeyTermMatchSurrogate_CreatesKeyTermPart()
        {
            var part = new ParsedPart(new KeyTermMatchSurrogate("blah snerb", "bleh", "bloh"));

            Assert.AreEqual(PartType.KeyTerm, part.Type);
            Assert.AreEqual(2, part.Words.Count);
            Assert.AreEqual("blah snerb", part.Text);
            Assert.AreEqual(2, part.Words.Count);
            Assert.AreEqual("blah", part.Words[0].Text);
            Assert.AreEqual("snerb", part.Words[1].Text);
        }
Exemplo n.º 9
0
        public void AddOwningPhrase_ExistingList_AddsToList()
        {
            var part = new ParsedPart(new Word[] { "beans" });
            var q    = new Question();

            q.Text = "Why?";
            part.AddOwningPhrase(q);
            q      = new Question();
            q.Text = "When?";
            part.AddOwningPhrase(q);
            Assert.AreEqual("Why?", part.Owners.First().Text);
            Assert.AreEqual("When?", part.Owners.Skip(1).First().Text);
        }
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Finds the longest phrase that is a sub-phrase of the specified part.
        /// </summary>
        /// <param name="part">The part.</param>
        /// ------------------------------------------------------------------------------------
        private SubPhraseMatch FindSubPhraseMatch(ParsedPart part)
        {
            if (m_questionWords != null && m_questionWords.Contains(part.Text))
            {
                return(null);
            }

            int partWordCount = part.Words.Count;

            for (int subPhraseWordCount = partWordCount - 1; subPhraseWordCount > 0; subPhraseWordCount--)
            {
                Dictionary <Word, List <ParsedPart> > subPhraseTable;
                if (!m_partsTable.TryGetValue(subPhraseWordCount, out subPhraseTable))
                {
                    continue;
                }

                for (int iWord = 0; iWord < partWordCount; iWord++)
                {
                    Word word = part.Words[iWord];
                    if (iWord + subPhraseWordCount > partWordCount)
                    {
                        break;                         // There aren't enough words left in this part to find a match
                    }
                    if (subPhraseWordCount == 1 && prepositionsAndArticles.Contains(word))
                    {
                        break;                 // Don't want to split a phrase using a part that consists of a single preposition or article.
                    }
                    List <ParsedPart> possibleSubParts;
                    if (subPhraseTable.TryGetValue(word, out possibleSubParts))
                    {
                        foreach (ParsedPart possibleSubPart in possibleSubParts)
                        {
                            int iWordTemp             = iWord + 1;
                            int isubWord              = 1;
                            int possiblePartWordCount = possibleSubPart.Words.Count;
                            while (isubWord < possiblePartWordCount && possibleSubPart.Words[isubWord] == part.Words[iWordTemp++])
                            {
                                isubWord++;
                            }
                            if (isubWord == possiblePartWordCount)
                            {
                                return(new SubPhraseMatch(iWord, possibleSubPart));
                            }
                        }
                    }
                }
            }
            return(null);
        }
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Performs the parsing logic to divide question text into translatable parts and key term parts.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void Parse()
        {
            if (m_partsTable.Any())
            {
                throw new InvalidOperationException("Parse called more than once.");
            }

            foreach (Question question in GetQuestions())
            {
                ParseQuestion(question);
            }

            for (int wordCount = m_partsTable.Keys.Max(); wordCount > 0; wordCount--)
            {
                Dictionary <Word, List <ParsedPart> > partsTable;
                if (!m_partsTable.TryGetValue(wordCount, out partsTable))
                {
                    continue;
                }

                int maxAllowableOccurrencesForSplitting = Math.Max(2, (26 - 2 ^ wordCount) / 2);

                List <ParsedPart> partsToDelete = new List <ParsedPart>();

                foreach (KeyValuePair <Word, List <ParsedPart> > phrasePartPair in partsTable)
                // REVIEW: problem: won't be able to add a new part that starts with this word - Is this really a problem?
                {
                    foreach (ParsedPart part in phrasePartPair.Value)
                    {
                        int numberOfOccurrencesOfPart = part.Owners.Count();
                        if (numberOfOccurrencesOfPart > maxAllowableOccurrencesForSplitting)
                        {
                            continue;
                        }

                        // Look to see if some other part is a sub-phrase of this part.
                        SubPhraseMatch match = FindSubPhraseMatch(part);
                        // Should an uncommon match be able to break a common one? If not, should we keep looking for a better sub-phrase match?
                        if (match != null /* && NEEDS WORK: part.Owners.Count() < match.Part.Owners.Count() * 2*/)
                        {
                            foreach (var owningPhraseOfPart in part.Owners)
                            {
                                //Question owningPhraseOfPart = part.Owners.First();
                                int iPart = owningPhraseOfPart.ParsedParts.IndexOf(part);
                                // Deal with any preceding remainder
                                if (match.StartIndex > 0)
                                {
                                    ParsedPart preceedingPart = GetOrCreatePart(part.GetSubWords(0, match.StartIndex),
                                                                                owningPhraseOfPart);
                                    owningPhraseOfPart.ParsedParts.Insert(iPart++, preceedingPart);
                                }
                                match.Part.AddOwningPhrase(owningPhraseOfPart);
                                owningPhraseOfPart.ParsedParts[iPart++] = match.Part;
                                // Deal with any following remainder
                                // Breaks this part at the given position because an existing part was found to be a
                                // substring of this part. Any text before the part being excluded will be broken off
                                // as a new part and returned. Any text following the part being excluded will be kept
                                // as this part's contents.
                                if (match.StartIndex + match.Part.Words.Count < part.Words.Count)
                                {
                                    ParsedPart followingPart =
                                        GetOrCreatePart(part.GetSubWords(match.StartIndex + match.Part.Words.Count),
                                                        owningPhraseOfPart);
                                    owningPhraseOfPart.ParsedParts.Insert(iPart, followingPart);
                                }
                                partsToDelete.Add(part);
                            }
                        }
                    }
                }
                foreach (ParsedPart partToDelete in partsToDelete)
                {
                    partsTable[partToDelete.Words[0]].Remove(partToDelete);
                }
            }
        }
 public SubPhraseMatch(int startIndex, ParsedPart part)
 {
     StartIndex = startIndex;
     Part       = part;
 }