/// <summary> /// Using this object's internal knowledge base, this will combine adjacent known aggregates /// into their largest sizes possible. It will iterate through the phrase texts a maximum of /// maxIterations. /// </summary> private void AggregateAdjacentKnownPhraseTexts(int maxIterations) { if (_AbortIsFlagged) { return; } //FIRST, GIVE US A FRESH START...I'M NOT ENTIRELY SURE THIS IS NECESSARY. PopulateAggregatePhraseTexts(AggregateSize); //IF AGGREGATE PHRASE TEXTS ONLY HAS ONE PHRASE, THEN WE DON'T HAVE ANY AGGREGATING TO DO if (AggregatePhraseTexts.Count == 1) { return; } //SET UP A LOOP THAT ITERATES THROUGH THE LIST OF AGGREGATE PHRASES // //var maxIterations = 100; //escape for our while loop var iterations = 0; var stillLooking = true; while (stillLooking && (iterations < maxIterations)) { if (_AbortIsFlagged) { return; } //This is "starting" as in before our next for loop which may modify AggregatePhraseTexts.Count. var startingPhraseCount = AggregatePhraseTexts.Count; var foundNewAggregate = false; //We do this until count - 1, because we are indexing i and i+1. for (int i = 0; i < startingPhraseCount - 1; i++) { if (_AbortIsFlagged) { return; } var phraseA = AggregatePhraseTexts[i]; var phraseB = AggregatePhraseTexts[i + 1]; if (IsPhraseKnown(phraseA) && IsPhraseKnown(phraseB)) { var newAggregate = phraseA + " " + phraseB; AggregatePhraseTexts[i] = newAggregate; if ((i + 1) < AggregatePhraseTexts.Count) { AggregatePhraseTexts.RemoveAt(i + 1); } foundNewAggregate = true; break; } } //IF WE FOUND A NEW AGGREGATE, THEN WE MAY YET FIND MORE IN ANOTHER ITERATION, //SO WE ARE STILL LOOKING. if (foundNewAggregate) { stillLooking = true; } else { //IF WE DIDN'T FIND A NEW AGGREGATE, THEN WE SEARCHED THE ENTIRE LIST //OF AGGREGATE PHRASE TEXTS AND NO ADJACENT PHRASE TEXTS WERE KNOWN. //SO, WE HAVE AGGREGATED AS MUCH AS WE CAN, AND WE ARE NO LONGER LOOKING. stillLooking = false; } //ITERATION COUNTER FOR ESCAPING OUT OF THIS WHILE LOOP. iterations++; } //IF WE HAVE ONLY ONE AGGREGATE PHRASE TEXT, THEN THAT TEXT SHOULD BE OUR EXACT LINE TEXT. if (AggregatePhraseTexts.Count == 1) { AggregatePhraseTexts[0] = _Target.Phrase.Text; } }
/// <summary> /// Parses the Line.Text into phrase texts, each phrase with a word count of aggregateSize, excepting /// the final phrase which may have fewer words. /// </summary> /// <param name="aggregateSize">the number of words contained in each aggregate phrase text.</param> private void PopulateAggregatePhraseTexts(int aggregateSize, bool skipAggregationIfLineIsKnown = true) { #region Thinking (try..) var methodThinkId = Guid.NewGuid(); History.Events.ThinkingAboutTargetEvent.Publish(methodThinkId); try { #endregion if (aggregateSize == 0) { throw new ArgumentOutOfRangeException("aggregateSize"); } AggregatePhraseTexts.Clear(); var lineText = _Target.Phrase.Text; var lineIsKnown = IsPhraseKnown(lineText); if (skipAggregationIfLineIsKnown && lineIsKnown) { //IF WE ARE SKIPPING THE AGGREGATION (WE'RE NOT FORCING LINE TO BREAK INTO AGGREGATES) //AND IF THE ENTIRE LINE TEXT IS KNOWN, //THEN ADD THE ENTIRE LINE TEXT TO THE AGGREGATE PHRASE TEXTS AND WE'RE DONE. AggregatePhraseTexts.Add(lineText); return; } //lets say lineText is 20 words long (long line). our aggregateSize is 2. //We will end up with 20/2 = 10 aggregate phrases. //Now, say aggregate size is 7. We will have 20 / 7 = 2 6/7, rounded up = 3 //phrases of lengths 7, 7, and 6. So, in our for loop, we will iterate //Count/AggregateSize rounded up number of times. var words = lineText.ParseIntoWords(); var wordCount = words.Count; var aggregateCount = (wordCount - 1) / aggregateSize + 1; //equivalent to Count/AggregateSize rounded up //IF AGGREGATE COUNT == 1, THEN OUR ENTIRE LINE IS OUR AGGREGATE PHRASE TEXT if (aggregateCount == 1) { AggregatePhraseTexts.Add(lineText); return; } //our first aggregate phrases up to the very last one will be of size aggregate size //but our last one is the remainder and may contain anywhere from 1 to aggregateSize //words in it. so we must be mindful of this. for (int i = 0; i < aggregateCount; i++) { var aggregatePhraseText = ""; for (int j = 0; j < aggregateSize; j++) { var wordIndex = (i * aggregateSize) + j; if (wordIndex >= words.Count) { break;//we have reached the end of the last phrase } var currentWord = words[wordIndex]; if (string.IsNullOrEmpty(aggregatePhraseText)) { aggregatePhraseText = currentWord; } else { aggregatePhraseText += " " + currentWord; } } if (!string.IsNullOrEmpty(aggregatePhraseText)) { AggregatePhraseTexts.Insert(i, aggregatePhraseText); } //remove any phrases that are empty. this shouldn't happen, but we'll log it if we find one. var count = AggregatePhraseTexts.Count; //counting down from bottom of index because we will be removing items if needed. for (int k = count - 1; k >= 0; k--) { if (string.IsNullOrEmpty(AggregatePhraseTexts[k])) { Services.Log(StudyResources.WarningMsgEmptyAggregatePhraseTextFound, LogPriority.Medium, LogCategory.Warning); AggregatePhraseTexts.RemoveAt(k); } } } #region (...finally) Thinked } finally { History.Events.ThinkedAboutTargetEvent.Publish(methodThinkId); } #endregion }