/// ------------------------------------------------------------------------------------ /// <summary> /// Builds a cluster list of correlation clusters, based upon the possible correlations /// previously established. Since one section head must correlate to at most one other /// section head, even though a number of possible correlations exist, priority is given /// to those that come first in the file over those that come later. /// </summary> /// ------------------------------------------------------------------------------------ private void DetermineCorrelatedSectionHeadClusters() { // create an the output list to contain the correlation clusters List<Cluster> clusterList = new List<Cluster>(); // Create destructible copies of each master list, allowing already // grouped items to be removed in any order List<OverlapInfo> proxyListRevCopy = new List<OverlapInfo>(m_proxyListRev.ToArray()); List<OverlapInfo> proxyListCurrCopy = new List<OverlapInfo>(m_proxyListCurr.ToArray()); // So long as there are remaining proxies, keep evaluating them for // correlations while (proxyListRevCopy.Count > 0 || proxyListCurrCopy.Count > 0) { // The curr and rev proxies that will form a correlation. // One of these may be null if no correlation exists OverlapInfo proxyCurr; OverlapInfo proxyRev; // If both lists have remaining proxies... if (proxyListRevCopy.Count > 0 && proxyListCurrCopy.Count > 0) { // choose whichever next one has the earlier start reference // (note: if refs are equal, doesn't matter which one) proxyRev = (OverlapInfo)proxyListRevCopy[0]; proxyCurr = (OverlapInfo)proxyListCurrCopy[0]; if (proxyRev.verseRefMin < proxyCurr.verseRefMin) { // Reset the current proxy to the first possible // correlating proxy that has not already been // used, or null if none exists proxyCurr = null; foreach (OverlapInfo oi in proxyRev.overlappedItemsInOther) { if (proxyListCurrCopy.Contains(oi)) { proxyCurr = oi; break; } } } else { // Reset the rev proxy to the first possible // correlating proxy, or null if none exists proxyRev = null; foreach (OverlapInfo oi in proxyCurr.overlappedItemsInOther) { if (proxyListRevCopy.Contains(oi)) { proxyRev = oi; break; } } } } // Otherwise, use whatever remains else if (proxyListRevCopy.Count > 0) { proxyRev = (OverlapInfo)proxyListRevCopy[0]; proxyCurr = null; } else { proxyCurr = (OverlapInfo)proxyListCurrCopy[0]; proxyRev = null; } // Build a new correlation cluster Cluster correlationCluster = new Cluster(); // so long as the rev proxy exists, add it to the cluster // and remove it from it's original list if (proxyRev != null) { correlationCluster.itemsRev.Add(proxyRev); proxyListRevCopy.Remove(proxyRev); // Assume (for reference sake) that the rev is the only // existing proxy and set the references accordingly correlationCluster.verseRefMin = proxyRev.verseRefMin; correlationCluster.verseRefMax = proxyRev.verseRefMax; } // same with the Curr if(proxyCurr != null) { correlationCluster.itemsCurr.Add(proxyCurr); proxyListCurrCopy.Remove(proxyCurr); // Assume (for reference sake) that the curr is the only // existing proxy and set the references accordingly correlationCluster.verseRefMin = proxyCurr.verseRefMin; correlationCluster.verseRefMax = proxyCurr.verseRefMax; } // If both a rev proxy and a curr proxy exist, adjust their // references (correcting their assumptions) if (proxyRev != null && proxyCurr != null) { correlationCluster.verseRefMin = Math.Min(proxyRev.verseRefMin, proxyCurr.verseRefMin); correlationCluster.verseRefMax = Math.Max(proxyRev.verseRefMax, proxyCurr.verseRefMax); } // Finally, add the newly created cluster to our cluster list clusterList.Add(correlationCluster); } // Hand off our finished list m_clusterList = clusterList; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Adds the item to cluster. /// </summary> /// <param name="cluster">The cluster.</param> /// <param name="oi">The overlap info.</param> /// ------------------------------------------------------------------------------------ private void AddItemToCluster(Cluster cluster, OverlapInfo oi) { if (oi.bookIsFromRev) cluster.itemsRev.Add(oi); else cluster.itemsCurr.Add(oi); // update the cluster reference range if (oi.verseRefMin < cluster.verseRefMin || cluster.verseRefMin == 0) cluster.verseRefMin = oi.verseRefMin; if (oi.verseRefMax > cluster.verseRefMax) cluster.verseRefMax = oi.verseRefMax; }
/// ------------------------------------------------------------------------------------ /// <summary> /// This static method steps through all the items in the given overlapCluster (which /// has muliple reference overlaps in one or both books) /// and determines which section heads should correlate. /// </summary> /// <param name="overlapCluster">the given overlap cluster</param> /// <returns>list of Cluster objects representing correlations between section heads</returns> /// ------------------------------------------------------------------------------------ public static List<Cluster> DetermineSectionHeadCorrelationClusters(Cluster overlapCluster) { // Deep-copy the lists of rev and curr items recieved from the overlapCluster List<OverlapInfo> sectionProxyListCurr = new List<OverlapInfo>(); List<OverlapInfo> sectionProxyListRev = new List<OverlapInfo>(); foreach (OverlapInfo oi in overlapCluster.itemsRev) { sectionProxyListRev.Add(oi.Clone()); } foreach (OverlapInfo oi in overlapCluster.itemsCurr) { sectionProxyListCurr.Add(oi.Clone()); } // Build the list of section head correlation clusters SectionHeadCorrelationHelper shch = new SectionHeadCorrelationHelper(); shch.DetermineSHCorrelationClusters(sectionProxyListCurr, sectionProxyListRev); return shch.m_clusterList; }
/// ------------------------------------------------------------------------------------ /// <summary> /// A helper method for section cluster tests- /// Verifies the contents of the given Cluster. /// </summary> /// <param name="cluster">The given cluster.</param> /// <param name="refMin">The expected verse ref min.</param> /// <param name="refMax">The expected verse ref max.</param> /// <param name="type">The the expected cluster type.</param> /// <param name="expectedItemsCurr">The expected items for the Current /// (see VerifyClusterItems() for details).</param> /// <param name="expectedItemsRev">The expected items for the Revision /// (see VerifyClusterItems() for details)</param> /// <param name="indexToInsertAtInOther">The expected index</param> /// ------------------------------------------------------------------------------------ private void VerifySectionCluster(Cluster cluster, int refMin, int refMax, ClusterType type, object expectedItemsCurr, object expectedItemsRev, int indexToInsertAtInOther) { //here we check the calling test code: // expected items should be consistent with the expected cluster type switch (type) { case ClusterType.MatchedItems: Assert.IsTrue(expectedItemsCurr is IScrSection || expectedItemsCurr is IScrTxtPara); Assert.IsTrue(expectedItemsRev is IScrSection || expectedItemsRev is IScrTxtPara); break; case ClusterType.MissingInCurrent: Assert.IsNull(expectedItemsCurr); Assert.IsTrue(expectedItemsRev is IScrSection || expectedItemsRev is IScrTxtPara); break; case ClusterType.AddedToCurrent: Assert.IsTrue(expectedItemsCurr is IScrSection || expectedItemsCurr is IScrTxtPara); Assert.IsNull(expectedItemsRev); break; case ClusterType.MultipleInBoth: Assert.IsTrue(expectedItemsCurr is List<IScrSection>); Assert.IsTrue(expectedItemsRev is List<IScrSection>); break; case ClusterType.SplitInCurrent: Assert.IsTrue(expectedItemsCurr is List<IScrSection>); Assert.IsTrue(expectedItemsRev is List<IScrSection>); break; case ClusterType.MergedInCurrent: Assert.IsTrue(expectedItemsCurr is List<IScrSection>); Assert.IsTrue(expectedItemsRev is List<IScrSection>); break; default: Assert.Fail("invalid type expected"); break; } VerifyCluster(cluster, refMin, refMax, type, expectedItemsCurr, expectedItemsRev, indexToInsertAtInOther, ClusterKind.ScrSection); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Extracts the matched items from the given cluster. /// </summary> /// <param name="cluster">The given cluster.</param> /// <param name="iItemCurr">The index of the Current item in the cluster.</param> /// <param name="iItemRev">The index of the Revision item in the cluster.</param> /// <param name="fFwd"><c>true</c> if this is the forward scan; false if backward scan.</param> /// ------------------------------------------------------------------------------------ private void ExtractMatchedItemsCluster(Cluster cluster, int iItemCurr, int iItemRev, bool fFwd) { // Make a new cluster for the pair of matched items Cluster newCluster = new Cluster(); newCluster.clusterType = ClusterType.MatchedItems; newCluster.verseRefMin = cluster.verseRefMin; newCluster.verseRefMax = cluster.verseRefMax; newCluster.itemsCurr.Add(cluster.itemsCurr[iItemCurr]); //use reference, not clone; the reference in original cluster will soon be deleted newCluster.itemsRev.Add(cluster.itemsRev[iItemRev]); m_clusterList.Add(newCluster); // If we are about to null out the last item on one side of the original cluster // (thus leaving orphans on the other side), we must set the indexToInsertAtInOther int newIndexToInsertAtInOther; if (ExtractingTheLastItemOnOneSide(cluster, iItemCurr, iItemRev, fFwd, out newIndexToInsertAtInOther)) cluster.indexToInsertAtInOther = newIndexToInsertAtInOther; // Mark the items in the original complex cluster for later deletion cluster.itemsCurr[iItemCurr] = null; cluster.itemsRev[iItemRev] = null; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Determines whether the given cluster is similar to this cluster. /// Purpose: Consective "similar" Added/Missing clusters may be combined and form a /// single difference. /// </summary> /// <param name="cluster">the given cluster to compare with</param> /// <returns> <c>true</c> if the given cluster is similar; otherwise, <c>false</c>. /// </returns> /// ------------------------------------------------------------------------------------ public bool IsSimilar(Cluster cluster) { // if this cluster is not similar, we are done if (cluster.clusterType != this.clusterType) return false; if (cluster.indexToInsertAtInOther != this.indexToInsertAtInOther) //Dest index return false; return true; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Extracts correlated pairs of ScrVerses from the given complex ScrVerse cluster. /// </summary> /// <param name="cluster">The given ScrVerse complex cluster in the master list. We may /// reduce the overlapping items in this cluster, but we must not remove it while we /// iterate through the clusters, lest we mangle our indexing.</param> /// <param name="scrVersesCurr">The list of Current ScrVerses.</param> /// <param name="scrVersesRev">The list of Revision ScrVerses.</param> /// <param name="correlationThreshold">The correlation threshold.</param> /// ------------------------------------------------------------------------------------ private void ExtractCorrelatedPairsFromScrVerseCluster(Cluster cluster, List<ScrVerse> scrVersesCurr, List<ScrVerse> scrVersesRev, double correlationThreshold) { // Attempt correlation of ScrVerse pairs from the start and end of the cluster. // If greater than our correlation threshold, add a matched items cluster and remove // those ScrVerses from our complex cluster. // Forward Scan // start comparing at the beginning of cluster until we find Current and Revision strings below threshold. double correlationFactor; int iCorrelatedFwd = -1; // the last index correlated on the forward scan ScrVerse verseCurr, verseRev; for (int iClstrItem = 0; iClstrItem < cluster.itemsCurr.Count; iClstrItem++) { if (iClstrItem >= cluster.itemsRev.Count) break; //no more Rev items to compare to verseCurr = scrVersesCurr[cluster.itemsCurr[iClstrItem].indexInOwner]; verseRev = scrVersesRev[cluster.itemsRev[iClstrItem].indexInOwner]; // The references must match before we simplify clusters if (verseCurr.StartRef == verseRev.StartRef && verseCurr.EndRef == verseRev.EndRef) { correlationFactor = ParagraphCorrelation.DetermineStringCorrelation( (verseCurr.Text != null) ? verseCurr.Text.Text : null, (verseRev.Text != null) ? verseRev.Text.Text : null, m_cache.ServiceLocator.UnicodeCharProps); if (correlationFactor >= correlationThreshold) { // There is enough correlation to create a more-simple cluster here. ExtractMatchedItemsCluster(cluster, iClstrItem, iClstrItem, true); iCorrelatedFwd = iClstrItem; } else { // this correlation attempt failed, // so we are finished with the forward scan looking for correlated strings break; } } else break; // this correlation attempt failed because references don't match } if (iCorrelatedFwd == cluster.itemsCurr.Count - 1 && iCorrelatedFwd == cluster.itemsRev.Count - 1) { // entire cluster was correlated on the forward scan; there is no remaining blob. return; } // Backward Scan // Begin comparing at the end of the cluster until we find Current and Revision strings below threshold. // at the verse number (that may cause an extra or missing verse number during revert) int cCorrelatedBkwrd = 0; for (int iClstrItemCurr = cluster.itemsCurr.Count - 1, iClstrItemRev = cluster.itemsRev.Count - 1; iClstrItemCurr >= 0 && iClstrItemRev >= 0; iClstrItemCurr--, iClstrItemRev--) { if (iClstrItemCurr <= iCorrelatedFwd) break; //we've reached the last Curr ScrVerse that was correlated on the forward scan if (iClstrItemRev <= iCorrelatedFwd) break; //we've reached the last Rev ScrVerse that was correlated on the forward scan verseCurr = scrVersesCurr[cluster.itemsCurr[iClstrItemCurr].indexInOwner]; verseRev = scrVersesRev[cluster.itemsRev[iClstrItemRev].indexInOwner]; if ((iClstrItemCurr == 0 || iClstrItemRev == 0) && verseCurr.HasVerseNumberRun != verseRev.HasVerseNumberRun) { // We will not process a pair at iCurr==0 nor iRev ==0 when one starts with a verse number // and the other does not, to avoid messy comparisons break; } // The references must match before we simplify clusters if (verseCurr.StartRef == verseRev.StartRef && verseCurr.EndRef == verseRev.EndRef) { correlationFactor = ParagraphCorrelation.DetermineStringCorrelation(verseCurr.Text.Text, verseRev.Text.Text, m_cache.ServiceLocator.UnicodeCharProps); if (correlationFactor >= correlationThreshold) { // There is enough correlation to create a more-simple cluster here. ExtractMatchedItemsCluster(cluster, iClstrItemCurr, iClstrItemRev, false); cCorrelatedBkwrd++; } else break; // finished backward scan looking for correlated strings } else break; // finished backward scan because references don't match } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Extracts paras that are mismatched in terms of being stanza breaks. /// </summary> /// <param name="cluster">The cluster.</param> /// ------------------------------------------------------------------------------------ private void ExtractMismatchedParas(Cluster cluster) { Debug.Assert(cluster.clusterType == ClusterType.MatchedItems); if (cluster.itemsCurr[0].isStanzaBreak != cluster.itemsRev[0].isStanzaBreak) { // Create new cluster from current side. int newIndexToInsertAtInOther = cluster.itemsCurr[0].indexInOwner; AddMissingAddedCluster(newIndexToInsertAtInOther, 0, cluster, true); // Create new cluster from revision side. newIndexToInsertAtInOther = cluster.itemsRev[0].indexInOwner; AddMissingAddedCluster(newIndexToInsertAtInOther, 0, cluster, false); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Extracts the missing or added items from a cluster. /// </summary> /// <param name="cluster">The cluster.</param> /// <param name="iFrom">Beginning limit for extracting items.</param> /// <param name="iTo">Ending limit for extracting items.</param> /// <param name="fIsCurrent">if set to <c>true</c> extract from the current side; /// <c>false</c> extract from the revision side.</param> /// <param name="fFwd">if <c>true</c> scanning forward; otherwise scan backward</param> /// <returns>number of extracted added or missing empty ScrVerses</returns> /// ------------------------------------------------------------------------------------ private int ExtractMissingAddedItems(Cluster cluster, int iFrom, int iTo, bool fIsCurrent, bool fFwd) { // Determine index to insert in other. int newIndexToInsertAtInOther; if (cluster.indexToInsertAtInOther != -1) newIndexToInsertAtInOther = cluster.indexToInsertAtInOther; else newIndexToInsertAtInOther = cluster.ItemList(fIsCurrent)[iFrom].indexInOwner; Debug.Assert(newIndexToInsertAtInOther != -1); int extracted = 0; if (fFwd) { // Scan forward from end of matching empty paras at beginning of cluster for (int iItem = iFrom; iItem < iTo; iItem++) { if (!cluster.Item(iItem, fIsCurrent).isStanzaBreak) break; AddMissingAddedCluster(newIndexToInsertAtInOther, iItem, cluster, fIsCurrent); extracted++; } } else { // Scan backward from end of matching empty paras at cluster end for (int iItem = iFrom; iItem > iTo; iItem--) { if (!cluster.Item(iItem, fIsCurrent).isStanzaBreak) break; // Since we are handling added/missing items at the end of the cluster, // we want to set the insertion point at the end of the other side. AddMissingAddedCluster(newIndexToInsertAtInOther, iItem, cluster, fIsCurrent); extracted++; } } return extracted; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Extracts the empty leading or trailing paragraphs from a ScrVerse cluster. /// </summary> /// <param name="cluster">The cluster.</param> /// ------------------------------------------------------------------------------------ private void ExtractStanzaBreaksFromScrVerseCluster(Cluster cluster) { // Forward Scan // Start comparing at the beginning of cluster until we no longer find empty paragraphs // in both the current AND revision. int iMatchingEmptyParaFwd = -1; // the last index of matching empty paragraphs on the forward scan for (int iClstrItem = 0; iClstrItem < cluster.itemsCurr.Count; iClstrItem++) { if (iClstrItem >= cluster.itemsRev.Count) break; //no more Rev items to compare to if (cluster.itemsCurr[iClstrItem].isStanzaBreak && cluster.itemsRev[iClstrItem].isStanzaBreak) { ExtractMatchedItemsCluster(cluster, iClstrItem, iClstrItem, true); iMatchingEmptyParaFwd = iClstrItem; } else break; // finished with forward scan (found content paras) } // Need to determine if either side has non-matching stanza breaks before content paras. if (cluster.IsStanzaBreak(iMatchingEmptyParaFwd + 1, true)) { // Added empty paragraphs before current side. Move into added cluster(s) int endIndex = cluster.Items(true); ExtractMissingAddedItems(cluster, iMatchingEmptyParaFwd + 1, endIndex, true, true); } else if (cluster.IsStanzaBreak(iMatchingEmptyParaFwd + 1, false)) { // Added empty paragraphs before revision side. Move into missing cluster(s) int endIndex = cluster.Items(false); ExtractMissingAddedItems(cluster, iMatchingEmptyParaFwd + 1, endIndex, false, true); } // Backward Scan // Start comparing at the end of cluster until we no longer find empty paragraphs // in both the current AND revision. int cCorrelatedBkwrd = 0; for (int iClstrItemCurr = cluster.itemsCurr.Count - 1, iClstrItemRev = cluster.itemsRev.Count - 1; iClstrItemCurr > 0 && iClstrItemRev > 0; iClstrItemCurr--, iClstrItemRev--) { if (iClstrItemCurr <= iMatchingEmptyParaFwd) break; //we've reached the last Curr ScrVerse that was matched on the forward scan if (iClstrItemRev <= iMatchingEmptyParaFwd) break; //we've reached the last Rev ScrVerse that was correlated on the forward scan if (cluster.itemsCurr[iClstrItemCurr].isStanzaBreak && cluster.itemsRev[iClstrItemRev].isStanzaBreak) { ExtractMatchedItemsCluster(cluster, iClstrItemCurr, iClstrItemRev, false); cCorrelatedBkwrd++; } } // Need to determine if either side has non-matching empty paragraphs after content paras. int iStartScanCurr = cluster.Items(true) - cCorrelatedBkwrd - 1; int iStartScanRev = cluster.Items(false) - cCorrelatedBkwrd - 1; if (cluster.IsStanzaBreak(iStartScanCurr, true)) { // Added non-matching stanza breaks after current side. Move into new added cluster(s) ExtractMissingAddedItems(cluster, iStartScanCurr, iMatchingEmptyParaFwd + 1, true, false); } else if (cluster.IsStanzaBreak(iStartScanRev, false)) { // Added non-matching stanza breaks after revision side. Move into new missing cluster(s) ExtractMissingAddedItems(cluster, iStartScanRev, iMatchingEmptyParaFwd + 1, false, false); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Extracts the missing or added empty paras from a ScrVerse cluster. This method does /// not attempt to find matching empty paragraphs. /// </summary> /// <param name="cluster">The cluster which should be an AddedToCurrent or /// MissingInCurrent cluster.</param> /// ------------------------------------------------------------------------------------ private void ExtractMissingAddedEmptyParasFromScrVerseCluster(Cluster cluster) { Debug.Assert(cluster.clusterType == ClusterType.AddedToCurrent || cluster.clusterType == ClusterType.OrphansInCurrent || cluster.clusterType == ClusterType.MissingInCurrent || cluster.clusterType == ClusterType.OrphansInRevision); bool fIsCurrent = Cluster.CurrentIsSource(cluster.clusterType); // Extract missing/added items from beginning of cluster. int removedFromStart = ExtractMissingAddedItems(cluster, 0, cluster.Items(fIsCurrent), fIsCurrent, true); // Extract missing/added items from end of cluster. ExtractMissingAddedItems(cluster, cluster.Items(fIsCurrent) - 1, removedFromStart, fIsCurrent, false); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Simplifies the clusters with leading and/or trailing empty paras. /// </summary> /// ------------------------------------------------------------------------------------ private void SimplifyLeadingTrailingEmptyParas(List<ScrVerse> scrVersesCurr) { // We need a copy of the original list to iterate through, because // the master list will likely need to have items added and removed Cluster[] clusterListCopy = new Cluster[m_clusterList.Count]; m_clusterList.CopyTo(clusterListCopy); for (int iCluster = 0; iCluster < clusterListCopy.Length; iCluster++) { Cluster cluster = clusterListCopy[iCluster]; if (cluster.clusterType == ClusterType.MultipleInBoth || cluster.clusterType == ClusterType.SplitInCurrent || cluster.clusterType == ClusterType.MergedInCurrent) { // In the master list, if possible, extract simpler clusters ExtractStanzaBreaksFromScrVerseCluster(clusterListCopy[iCluster]); } else if (cluster.clusterType == ClusterType.AddedToCurrent || cluster.clusterType == ClusterType.OrphansInCurrent || cluster.clusterType == ClusterType.MissingInCurrent || cluster.clusterType == ClusterType.OrphansInRevision) { bool fIsCurrent = cluster.clusterType == ClusterType.AddedToCurrent; if (cluster.ItemList(fIsCurrent).Count > 1) { // Simplify added/missing clusters that have more than one item in the cluster. // We simplify them because items in them to separate empty paragraphs. ExtractMissingAddedEmptyParasFromScrVerseCluster(clusterListCopy[iCluster]); } } else if (cluster.clusterType == ClusterType.MatchedItems) { // If a match was made with a stanza break and an non-stanza break para, then // we need to break the cluster apart. ExtractMismatchedParas(clusterListCopy[iCluster]); } } CleanUpClusterListForRemovedItems(); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Finds the index to insert at in the other side (i.e. current or revision). /// </summary> /// <param name="cluster">The cluster that needs to know where it should be inserted.</param> /// <returns>index where this cluster should be inserted in the </returns> /// ------------------------------------------------------------------------------------ private int FindIndexToInsertAtInOther(Cluster cluster) { Debug.Assert((cluster.itemsCurr.Count == 0 && cluster.itemsRev.Count > 0) || (cluster.itemsRev.Count == 0 && cluster.itemsCurr.Count > 0), "This should be a one-sided cluster"); // Set flag indicating which side (current or revision) has items. bool fCurrentHasItems = cluster.itemsCurr.Count > 0; foreach (Cluster clstr in m_clusterList) { int numItemsOtherSide = clstr.Items(!fCurrentHasItems); if (numItemsOtherSide > 0) { // Since the other side in this cluster has items, then it is a candidate. OverlapInfo lastItemOtherSide = clstr.Item(numItemsOtherSide - 1, !fCurrentHasItems); if (cluster.verseRefMin >= lastItemOtherSide.verseRefMin) { // Since we are beyond the reference on the other side, set the index after this cluster // on the other side. return lastItemOtherSide.indexInOwner + 1; } } } return 0; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Determine if we are extracting the last item on one side of the given cluster /// (thus leaving orphans on the other side). /// If so, calculate the necessary indexToInsertAtInOther for the cluster. /// </summary> /// <param name="cluster">The given cluster.</param> /// <param name="iItemCurr">The index of the Current item in the cluster.</param> /// <param name="iItemRev">The index of the Revision item in the cluster.</param> /// <param name="fFwd"><c>true</c> if this is the forward scan; false if backward scan.</param> /// <param name="newIndexToInsertAtInOther">Out: The new index to insert at in other.</param> /// <returns>true if we are indeed extracting the last item on one side, and calculating the /// new index to insert at in other</returns> /// ------------------------------------------------------------------------------------ private bool ExtractingTheLastItemOnOneSide(Cluster cluster, int iItemCurr, int iItemRev, bool fFwd, out int newIndexToInsertAtInOther) { newIndexToInsertAtInOther = -1; // if we have the same number of items on both sides, it's impossible to leave orphans on one side if (cluster.itemsCurr.Count == cluster.itemsRev.Count) return false; if (fFwd) { // on the forward scan // If we're at the last item on either side of the cluster, // the insert index just beyond my index. if (iItemCurr == cluster.itemsCurr.Count - 1) newIndexToInsertAtInOther = cluster.itemsCurr[iItemCurr].indexInOwner + 1; else if (iItemRev == cluster.itemsRev.Count - 1) newIndexToInsertAtInOther = cluster.itemsRev[iItemRev].indexInOwner + 1; } else { // we're on the backward scan Debug.Assert(iItemCurr >= 0 && iItemRev >= 0); // If the item above me is null or I'm the first value, then I'm the last // non-null item on this side, and the insert index is at my index. if (iItemCurr == 0 || cluster.itemsCurr[iItemCurr - 1] == null) newIndexToInsertAtInOther = cluster.itemsCurr[iItemCurr].indexInOwner; else if (iItemRev == 0 || cluster.itemsRev[iItemRev - 1] == null) newIndexToInsertAtInOther = cluster.itemsRev[iItemRev].indexInOwner; } // return true if we found the critter return (newIndexToInsertAtInOther > -1); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Creates a shallow copy of this cluster and returns it. /// </summary> /// ------------------------------------------------------------------------------------ public Cluster Clone() { // Create the cloned cluster ... Cluster toReturn = new Cluster(); // ... And dump our data into it, making shallow copies of the ArrayLists toReturn.clusterType = clusterType; toReturn.verseRefMax = verseRefMax; toReturn.verseRefMin = verseRefMin; toReturn.indexToInsertAtInOther = indexToInsertAtInOther; toReturn.itemsCurr = new List<OverlapInfo>(itemsCurr.ToArray()); toReturn.itemsRev = new List<OverlapInfo>(itemsRev.ToArray()); toReturn.sortKey = sortKey; // Finally, return the clone return toReturn; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Make a new cluster for missing/added. /// </summary> /// <param name="insertInOtherIndex">Index where added/missing cluster should be inserted in other.</param> /// <param name="iItem">The index of the item.</param> /// <param name="cluster">The cluster.</param> /// <param name="fIsCurrent">if set to <c>true</c> extract from the current side; /// <c>false</c> extract from the revision side.</param> /// ------------------------------------------------------------------------------------ private void AddMissingAddedCluster(int insertInOtherIndex, int iItem, Cluster cluster, bool fIsCurrent) { Cluster newCluster = new Cluster(); newCluster.clusterType = (fIsCurrent) ? ClusterType.AddedToCurrent : ClusterType.MissingInCurrent; newCluster.verseRefMin = cluster.verseRefMin; newCluster.verseRefMax = cluster.verseRefMax; newCluster.indexToInsertAtInOther = insertInOtherIndex; newCluster.ItemList(fIsCurrent).Add(cluster.Item(iItem, fIsCurrent)); m_clusterList.Add(newCluster); // Mark the item in the original complex cluster for later deletion if (fIsCurrent) cluster.itemsCurr[iItem] = null; else cluster.itemsRev[iItem] = null; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Merges the given cluster's source item into this cluster. /// This is valid only for similar Missing/Added Cluster types. /// </summary> /// <param name="cluster">the given cluster.</param> /// ------------------------------------------------------------------------------------ public void MergeSourceItems(Cluster cluster) { // our given cluster must be 'similar' Debug.Assert(this.IsSimilar(cluster)); // Do the merge // our given cluster normally has only one source item; that's all we'll accomodate for now Debug.Assert(cluster.SourceItems.Count == 1); AddSourceItem((OverlapInfo)cluster.SourceItems[0]); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Create basic overlap clusters from the OverlapInfo proxies for the Current and /// Revision. Note that FindOverlappedPairs() must be called first. The clusters /// will still need their types and insertIndices determined. /// </summary> /// ------------------------------------------------------------------------------------ protected void CreateBasicOverlapClusters() { // Find all overlapped pairs FindOverlappedPairs(); // output list that will ultimatly contain all of the clusters we find List<Cluster> clusterList = new List<Cluster>(); // Create destructible copies of each master list, allowing already // grouped items to be removed, avoiding possible infinite loops // created when the lists are out-of-order List<OverlapInfo> proxyListRevCopy = new List<OverlapInfo>(m_proxyListRev.ToArray()); List<OverlapInfo> proxyListCurrCopy = new List<OverlapInfo>(m_proxyListCurr.ToArray()); // So long as there are remaining proxies, keep searching for new // clusters while (proxyListRevCopy.Count > 0 || proxyListCurrCopy.Count > 0) { // The proxy to start the new cluster search from OverlapInfo firstProxy; // If both lists have remaining proxies, choose the lowest // of the two if (proxyListRevCopy.Count > 0 && proxyListCurrCopy.Count > 0) { //get the next one with the first start reference // (note: if refs are equal, doesn't matter which one) OverlapInfo oiRev = proxyListRevCopy[0]; OverlapInfo oiCurr = proxyListCurrCopy[0]; if (oiRev.verseRefMin < oiCurr.verseRefMin) firstProxy = oiRev; else firstProxy = oiCurr; } // Otherwise, use whatever remains else if (proxyListRevCopy.Count > 0) firstProxy = proxyListRevCopy[0]; else firstProxy = proxyListCurrCopy[0]; // the queue of proxies that will be used form a cluster Queue<OverlapInfo> queue = new Queue<OverlapInfo>(); // list of those proxies that have been added to the queue // so that they will not be added again, causing errors List<OverlapInfo> visited = new List<OverlapInfo>(); // list for accumulating proxies (both Curr and Rev) that we // find for a cluster List<OverlapInfo> proxyListForCluster = new List<OverlapInfo>(); // Prime the queue by putting the first starting point // into it and marking that it's been visited queue.Enqueue(firstProxy); visited.Add(firstProxy); // Now that the resources are set up, begin the cluster // search. The queue is used to make a breadth-first // search of the tree of relationships that exists // between overlapping proxies, with the visited // list making sure that no proxy that has already // been included in the cluster will be included again // (avoiding a potentially infinite cluster search) while (queue.Count > 0) { // Remove the next item in the queue OverlapInfo currentProxy = queue.Dequeue(); // Remove the current item from it's corresponding list, now that it // has been used if (currentProxy.bookIsFromRev) { proxyListRevCopy.Remove(currentProxy); } else { proxyListCurrCopy.Remove(currentProxy); } // Push the current overlap proxy's children onto the queue, // so long as they haven't already been there foreach (OverlapInfo child in currentProxy.overlappedItemsInOther) { if (!visited.Contains(child)) { // Enqueue the child and note that it's been visited queue.Enqueue(child); visited.Add(child); } } // Add the current proxy to the list for the cluster proxyListForCluster.Add(currentProxy); } // Create a cluster with the items we have accumulated Cluster cluster = new Cluster(); foreach (OverlapInfo oi in proxyListForCluster) { // Update the verse ref range for the cluster if (cluster.verseRefMin == 0) { cluster.verseRefMin = oi.verseRefMin; cluster.verseRefMax = oi.verseRefMax; } else { cluster.verseRefMin = Math.Min(cluster.verseRefMin, oi.verseRefMin); cluster.verseRefMax = Math.Max(cluster.verseRefMax, oi.verseRefMax); } // Add the item to the cluster lists, according to its book type if (oi.bookIsFromRev) cluster.itemsRev.Add(oi); else cluster.itemsCurr.Add(oi); } // and save this new cluster in our output list of clusters clusterList.Add(cluster); } // save our cluster list m_clusterList = clusterList; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Simplifies the complex ScrVerse clusters that have some correlated pairs. /// </summary> /// <param name="scrVersesCurr">The list of Current ScrVerses.</param> /// <param name="scrVersesRev">The list of Revision ScrVerses.</param> /// <param name="correlationThreshold">The correlation threshold.</param> /// ------------------------------------------------------------------------------------ private void SimplifyComplexScrVerseClusters(List<ScrVerse> scrVersesCurr, List<ScrVerse> scrVersesRev, double correlationThreshold) { // We need a copy of the original list to iterate through, because // the master list will likely need to have items added and removed Cluster[] clusterListCopy = new Cluster[m_clusterList.Count]; m_clusterList.CopyTo(clusterListCopy); for (int iCluster = 0; iCluster < clusterListCopy.Length; iCluster++) { Cluster cluster = clusterListCopy[iCluster]; if ((cluster.clusterType == ClusterType.MultipleInBoth || cluster.clusterType == ClusterType.SplitInCurrent || cluster.clusterType == ClusterType.MergedInCurrent) && // we don't simplify a complex cluster caused by a network of verse bridge overlaps !cluster.ContainsVerseBridgeDifference && cluster.SpansParaBreak) { // In the master list, if possible, extract simpler clusters ExtractCorrelatedPairsFromScrVerseCluster(m_clusterList[iCluster], scrVersesCurr, scrVersesRev, correlationThreshold); } } CleanUpClusterListForRemovedItems(); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Create basic overlap clusters from the OverlapInfo proxies for the Current and /// Revision. /// In this case we include only proxies that are adjacent to one another in the /// owner's sequence in the Current or Revision (e.g. adjacent ScrVerses in the same section). /// The clusters will still need their types and insert Indices determined. /// </summary> /// ------------------------------------------------------------------------------------ private void CreateBasicAdjacentOverlapClusters() { // output list that will ultimatly contain all of the clusters we find List<Cluster> clusterList = new List<Cluster>(); // Create destructible copies of each master list, allowing already // grouped items to be removed, avoiding possible infinite loops // created when the lists are out-of-order List<OverlapInfo> proxyListRevCopy = new List<OverlapInfo>(m_proxyListRev.ToArray()); List<OverlapInfo> proxyListCurrCopy = new List<OverlapInfo>(m_proxyListCurr.ToArray()); Cluster cluster = null; // the cluster under construction at any given time // So long as there are remaining proxies, keep working through them while (proxyListRevCopy.Count > 0 || proxyListCurrCopy.Count > 0) { // The proxy to process this pass- the first in either the Current or Rev processing list OverlapInfo firstProxy; // If both lists have remaining proxies... if (proxyListRevCopy.Count > 0 && proxyListCurrCopy.Count > 0) { OverlapInfo oiRev = proxyListRevCopy[0]; OverlapInfo oiCurr = proxyListCurrCopy[0]; // if we are starting a new cluster... if (cluster == null) { if ((oiRev.isStanzaBreak && !oiCurr.isStanzaBreak) || (!oiRev.isStanzaBreak && oiCurr.isStanzaBreak)) { // only one of the current or revision is a stanza break. Get the stanza break. firstProxy = oiRev.isStanzaBreak ? oiRev : oiCurr; } else { //get the next one with the earlier start reference // (if refs are equal, doesn't matter which one) if (oiRev.verseRefMin < oiCurr.verseRefMin) firstProxy = oiRev; else firstProxy = oiCurr; } } else { // See if either side has a proxy that overlaps our cluster under construction if (CanBeIncluded(cluster, oiRev)) firstProxy = oiRev; else if (CanBeIncluded(cluster, oiCurr)) firstProxy = oiCurr; else { //Neither proxy overlaps with our cluster. // save the cluster in progress, and prepare to start a new one clusterList.Add(cluster); cluster = null; continue; } } } // Otherwise, use whatever remains else if (proxyListRevCopy.Count > 0) firstProxy = proxyListRevCopy[0]; else firstProxy = proxyListCurrCopy[0]; // Now add this proxy to a cluster if (cluster == null) { // This is the first item for this cluster cluster = new Cluster(); AddItemToCluster(cluster, firstProxy); } else if (CanBeIncluded(cluster, firstProxy)) { // This proxy overlaps our cluster. Grab it. AddItemToCluster(cluster, firstProxy); } else { //This proxy is NOT overlapping with our cluster. // save the cluster in progress clusterList.Add(cluster); // start a new cluster for this proxy cluster = new Cluster(); AddItemToCluster(cluster, firstProxy); } // Remove the current proxy from it's corresponding list, now that it // has been used if (firstProxy.bookIsFromRev) proxyListRevCopy.Remove(firstProxy); else proxyListCurrCopy.Remove(firstProxy); } // save the final cluster, if any if (cluster != null) { Debug.Assert(cluster.itemsCurr.Count > 0 || cluster.itemsRev.Count > 0); clusterList.Add(cluster); } // save our cluster list m_clusterList = clusterList; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Verifies expected characteristics of any kind of cluster. /// </summary> /// <param name="cluster">The cluster.</param> /// <param name="refMin">The reference min.</param> /// <param name="refMax">The reference max.</param> /// <param name="type">The cluster type.</param> /// <param name="expectedItemsCurr">The expected items in the Current.</param> /// <param name="expectedItemsRev">The expected items in the Revision.</param> /// <param name="indexToInsertAtInOther">The index to insert at in other.</param> /// <param name="kindOfCluster">The kind of cluster.</param> /// ------------------------------------------------------------------------------------ private void VerifyCluster(Cluster cluster, int refMin, int refMax, ClusterType type, object expectedItemsCurr, object expectedItemsRev, int indexToInsertAtInOther, ClusterKind kindOfCluster) { // verify the basics Assert.AreEqual(refMin, cluster.verseRefMin); Assert.AreEqual(refMax, cluster.verseRefMax); Assert.AreEqual(type, cluster.clusterType); // verify the indexToInsertAtInOther Assert.AreEqual(indexToInsertAtInOther, cluster.indexToInsertAtInOther); // now verify the cluster's items switch (kindOfCluster) { case ClusterKind.ScrSection: VerifySectionClusterItems(expectedItemsCurr, cluster.itemsCurr, kindOfCluster); VerifySectionClusterItems(expectedItemsRev, cluster.itemsRev, kindOfCluster); break; case ClusterKind.ScrVerse: VerifyScrVerseClusterItems(expectedItemsCurr, cluster.itemsCurr, kindOfCluster); VerifyScrVerseClusterItems(expectedItemsRev, cluster.itemsRev, kindOfCluster); break; } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Determines whether a given overlap info can be included in the current cluster. /// To be included in the cluster, it must meet the following criteria: /// * have a reference overlap with the specified cluster, /// * be adjacent to and within the same owning sequence (e.g. same StText), and /// </summary> /// <param name="cluster">The cluster.</param> /// <param name="oi">The overlap info.</param> /// <returns><c>true</c> if the specified OverlapInfo can be included in the cluster</returns> /// ------------------------------------------------------------------------------------ private bool CanBeIncluded(Cluster cluster, OverlapInfo oi) { // is oi completely before the cluster range? if (oi.verseRefMax < cluster.verseRefMin) return false; // is cluster range completely before oi? if (cluster.verseRefMax < oi.verseRefMin) return false; // there must be some overlap // is oi for a ScrVerse contained in the same StText? if (oi.bookIsFromRev) { if (cluster.itemsRev.Count > 0 && oi.myParaOwner != cluster.itemsRev[cluster.itemsRev.Count - 1].myParaOwner) { return false; // paragraph does not have the same owning StText } } else { if (cluster.itemsCurr.Count > 0 && oi.myParaOwner != cluster.itemsCurr[cluster.itemsCurr.Count - 1].myParaOwner) { return false; // not the next item in the owner's sequence } } return true; }
/// ------------------------------------------------------------------------------------ /// <summary> /// A helper method for section cluster tests- /// Verifies the contents of the given Cluster. This overload lets the caller ignore /// the indexToInsertAtInOther, which is only needed for Missing/Added clusters. /// </summary> /// ------------------------------------------------------------------------------------ private void VerifySectionCluster(Cluster cluster, int refMin, int refMax, ClusterType type, object expectedItemsCurr, object expectedItemsRev) { Assert.IsTrue(cluster.clusterType != ClusterType.MissingInCurrent && cluster.clusterType != ClusterType.AddedToCurrent, "Missing/Added clusters must be verified by passing in the indexToInsertAtInOther parameter."); // verify the details VerifySectionCluster(cluster, refMin, refMax, type, expectedItemsCurr, expectedItemsRev, -1); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Adds the item to cluster. /// </summary> /// <param name="cluster">The cluster.</param> /// <param name="oi">The overlap info.</param> /// ------------------------------------------------------------------------------------ private void AddItemToCluster(Cluster cluster, OverlapInfo oi) { if (oi.myBook == OverlapInfo.kCurrent) cluster.itemsCurr.Add(oi); else if (oi.myBook == OverlapInfo.kRevision) cluster.itemsRev.Add(oi); // update the cluster reference range if (oi.verseRefMin < cluster.verseRefMin || cluster.verseRefMin == 0) cluster.verseRefMin = oi.verseRefMin; if (oi.verseRefMax > cluster.verseRefMax) cluster.verseRefMax = oi.verseRefMax; }