/// ------------------------------------------------------------------------------------ /// <summary> /// Performs the parsing logic to divide question text into translatable parts and key term parts. /// </summary> /// ------------------------------------------------------------------------------------ private void Parse() { if (m_partsTable.Any()) { throw new InvalidOperationException("Parse called more than once."); } foreach (Question question in GetQuestions()) { ParseQuestion(question); } for (int wordCount = m_partsTable.Keys.Max(); wordCount > 0; wordCount--) { Dictionary <Word, List <ParsedPart> > partsTable; if (!m_partsTable.TryGetValue(wordCount, out partsTable)) { continue; } int maxAllowableOccurrencesForSplitting = Math.Max(2, (26 - 2 ^ wordCount) / 2); List <ParsedPart> partsToDelete = new List <ParsedPart>(); foreach (KeyValuePair <Word, List <ParsedPart> > phrasePartPair in partsTable) // REVIEW: problem: won't be able to add a new part that starts with this word - Is this really a problem? { foreach (ParsedPart part in phrasePartPair.Value) { int numberOfOccurrencesOfPart = part.Owners.Count(); if (numberOfOccurrencesOfPart > maxAllowableOccurrencesForSplitting) { continue; } // Look to see if some other part is a sub-phrase of this part. SubPhraseMatch match = FindSubPhraseMatch(part); // Should an uncommon match be able to break a common one? If not, should we keep looking for a better sub-phrase match? if (match != null /* && NEEDS WORK: part.Owners.Count() < match.Part.Owners.Count() * 2*/) { foreach (var owningPhraseOfPart in part.Owners) { //Question owningPhraseOfPart = part.Owners.First(); int iPart = owningPhraseOfPart.ParsedParts.IndexOf(part); // Deal with any preceding remainder if (match.StartIndex > 0) { ParsedPart preceedingPart = GetOrCreatePart(part.GetSubWords(0, match.StartIndex), owningPhraseOfPart); owningPhraseOfPart.ParsedParts.Insert(iPart++, preceedingPart); } match.Part.AddOwningPhrase(owningPhraseOfPart); owningPhraseOfPart.ParsedParts[iPart++] = match.Part; // Deal with any following remainder // Breaks this part at the given position because an existing part was found to be a // substring of this part. Any text before the part being excluded will be broken off // as a new part and returned. Any text following the part being excluded will be kept // as this part's contents. if (match.StartIndex + match.Part.Words.Count < part.Words.Count) { ParsedPart followingPart = GetOrCreatePart(part.GetSubWords(match.StartIndex + match.Part.Words.Count), owningPhraseOfPart); owningPhraseOfPart.ParsedParts.Insert(iPart, followingPart); } partsToDelete.Add(part); } } } } foreach (ParsedPart partToDelete in partsToDelete) { partsTable[partToDelete.Words[0]].Remove(partToDelete); } } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="PhraseTranslationHelper"/> class. /// </summary> /// ------------------------------------------------------------------------------------ public PhraseTranslationHelper(IEnumerable <TranslatablePhrase> phrases, IEnumerable <IKeyTerm> keyTerms, KeyTermRules keyTermRules, IEnumerable <Substitution> phrasesToIgnore) { TranslatablePhrase.s_helper = this; m_keyTermsTable = new Dictionary <Word, List <KeyTermMatch> >(keyTerms.Count()); PopulateKeyTermsTable(keyTerms, keyTermRules); m_phraseSubstitutions = new Dictionary <Regex, string>(phrasesToIgnore.Count()); foreach (Substitution substitutePhrase in phrasesToIgnore) { m_phraseSubstitutions[substitutePhrase.RegEx] = substitutePhrase.RegExReplacementString; } m_partsTable = new SortedDictionary <int, Dictionary <Word, List <Part> > >(); foreach (TranslatablePhrase phrase in phrases.Where(p => !string.IsNullOrEmpty(p.PhraseToDisplayInUI))) { if (!phrase.IsExcluded) { PhraseParser parser = new PhraseParser(m_keyTermsTable, m_phraseSubstitutions, phrase, GetOrCreatePart); foreach (IPhrasePart part in parser.Parse()) { phrase.m_parts.Add(part); } } m_phrases.Add(phrase); if (phrase.Category == -1) { m_categories[phrase.SequenceNumber] = phrase; } } for (int wordCount = m_partsTable.Keys.Max(); wordCount > 1; wordCount--) { Dictionary <Word, List <Part> > partsTable; if (!m_partsTable.TryGetValue(wordCount, out partsTable)) { continue; } List <Part> partsToDelete = new List <Part>(); foreach (KeyValuePair <Word, List <Part> > phrasePartPair in partsTable) // REVIEW: problem: won't be able to add a new part that starts with this word { foreach (Part part in phrasePartPair.Value) { if (part.OwningPhrases.Count() != 1) { continue; } // Look to see if some other part is a sub-phrase of this part. SubPhraseMatch match = FindSubPhraseMatch(part); if (match != null) { TranslatablePhrase owningPhraseOfPart = part.OwningPhrases.First(); int iPart = owningPhraseOfPart.m_parts.IndexOf(part); // Deal with any preceding remainder if (match.StartIndex > 0) { Part preceedingPart = GetOrCreatePart(part.GetSubWords(0, match.StartIndex), owningPhraseOfPart, wordCount); owningPhraseOfPart.m_parts.Insert(iPart++, preceedingPart); } match.Part.AddOwningPhrase(owningPhraseOfPart); owningPhraseOfPart.m_parts[iPart++] = match.Part; // Deal with any following remainder // Breaks this part at the given position because an existing part was found to be a // substring of this part. Any text before the part being excluded will be broken off // as a new part and returned. Any text following the part being excluded will be kept // as this part's contents. if (match.StartIndex + match.Part.m_words.Count < part.m_words.Count) { Part followingPart = GetOrCreatePart(part.GetSubWords(match.StartIndex + match.Part.m_words.Count), owningPhraseOfPart, wordCount); owningPhraseOfPart.m_parts.Insert(iPart, followingPart); } partsToDelete.Add(part); } } } foreach (Part partToDelete in partsToDelete) { partsTable[partToDelete.m_words[0]].Remove(partToDelete); } } m_filteredPhrases = m_phrases; }