Exemplo n.º 1
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Builds a cluster list of correlation clusters, based upon the possible correlations
		/// previously established.  Since one section head must correlate to at most one other
		/// section head, even though a number of possible correlations exist, priority is given
		/// to those that come first in the file over those that come later.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private void DetermineCorrelatedSectionHeadClusters()
		{
			// create an the output list to contain the correlation clusters
			List<Cluster> clusterList = new List<Cluster>();

			// Create destructible copies of each master list, allowing already
			// grouped items to be removed in any order
			List<OverlapInfo> proxyListRevCopy = new List<OverlapInfo>(m_proxyListRev.ToArray());
			List<OverlapInfo> proxyListCurrCopy = new List<OverlapInfo>(m_proxyListCurr.ToArray());

			// So long as there are remaining proxies, keep evaluating them for
			// correlations
			while (proxyListRevCopy.Count > 0 || proxyListCurrCopy.Count > 0)
			{
				// The curr and rev proxies that will form a correlation.
				// One of these may be null if no correlation exists
				OverlapInfo proxyCurr;
				OverlapInfo proxyRev;

				// If both lists have remaining proxies...
				if (proxyListRevCopy.Count > 0 && proxyListCurrCopy.Count > 0)
				{
					// choose whichever next one has the earlier start reference
					// (note: if refs are equal, doesn't matter which one)
					proxyRev = (OverlapInfo)proxyListRevCopy[0];
					proxyCurr = (OverlapInfo)proxyListCurrCopy[0];
					if (proxyRev.verseRefMin < proxyCurr.verseRefMin)
					{
						// Reset the current proxy to the first possible
						// correlating proxy that has not already been
						// used, or null if none exists
						proxyCurr = null;

						foreach (OverlapInfo oi in proxyRev.overlappedItemsInOther)
						{
							if (proxyListCurrCopy.Contains(oi))
							{
								proxyCurr = oi;
								break;
							}
						}
					}
					else
					{
						// Reset the rev proxy to the first possible
						// correlating proxy, or null if none exists
						proxyRev = null;

						foreach (OverlapInfo oi in proxyCurr.overlappedItemsInOther)
						{
							if (proxyListRevCopy.Contains(oi))
							{
								proxyRev = oi;
								break;
							}
						}
					}
				}
				// Otherwise, use whatever remains
				else if (proxyListRevCopy.Count > 0)
				{
					proxyRev = (OverlapInfo)proxyListRevCopy[0];
					proxyCurr = null;
				}
				else
				{
					proxyCurr = (OverlapInfo)proxyListCurrCopy[0];
					proxyRev = null;
				}

				// Build a new correlation cluster
				Cluster correlationCluster = new Cluster();
				// so long as the rev proxy exists, add it to the cluster
				// and remove it from it's original list
				if (proxyRev != null)
				{
					correlationCluster.itemsRev.Add(proxyRev);
					proxyListRevCopy.Remove(proxyRev);
					// Assume (for reference sake) that the rev is the only
					// existing proxy and set the references accordingly
					correlationCluster.verseRefMin = proxyRev.verseRefMin;
					correlationCluster.verseRefMax = proxyRev.verseRefMax;
				}
				// same with the Curr
				if(proxyCurr != null)
				{
					correlationCluster.itemsCurr.Add(proxyCurr);
					proxyListCurrCopy.Remove(proxyCurr);
					// Assume (for reference sake) that the curr is the only
					// existing proxy and set the references accordingly
					correlationCluster.verseRefMin = proxyCurr.verseRefMin;
					correlationCluster.verseRefMax = proxyCurr.verseRefMax;
				}
				// If both a rev proxy and a curr proxy exist, adjust their
				// references (correcting their assumptions)
				if (proxyRev != null && proxyCurr != null)
				{
					correlationCluster.verseRefMin = Math.Min(proxyRev.verseRefMin, proxyCurr.verseRefMin);
					correlationCluster.verseRefMax = Math.Max(proxyRev.verseRefMax, proxyCurr.verseRefMax);
				}
				// Finally, add the newly created cluster to our cluster list
				clusterList.Add(correlationCluster);
			}

			// Hand off our finished list
			m_clusterList = clusterList;
		}
Exemplo n.º 2
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Adds the item to cluster.
		/// </summary>
		/// <param name="cluster">The cluster.</param>
		/// <param name="oi">The overlap info.</param>
		/// ------------------------------------------------------------------------------------
		private void AddItemToCluster(Cluster cluster, OverlapInfo oi)
		{
			if (oi.bookIsFromRev)
				cluster.itemsRev.Add(oi);
			else
				cluster.itemsCurr.Add(oi);

			// update the cluster reference range
			if (oi.verseRefMin < cluster.verseRefMin || cluster.verseRefMin == 0)
				cluster.verseRefMin = oi.verseRefMin;
			if (oi.verseRefMax > cluster.verseRefMax)
				cluster.verseRefMax = oi.verseRefMax;
		}
Exemplo n.º 3
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// This static method steps through all the items in the given overlapCluster (which
		/// has muliple reference overlaps in one or both books)
		/// and determines which section heads should correlate.
		/// </summary>
		/// <param name="overlapCluster">the given overlap cluster</param>
		/// <returns>list of Cluster objects representing correlations between section heads</returns>
		/// ------------------------------------------------------------------------------------
		public static List<Cluster> DetermineSectionHeadCorrelationClusters(Cluster overlapCluster)
		{
			// Deep-copy the lists of rev and curr items recieved from the overlapCluster
			List<OverlapInfo> sectionProxyListCurr = new List<OverlapInfo>();
			List<OverlapInfo> sectionProxyListRev = new List<OverlapInfo>();
			foreach (OverlapInfo oi in overlapCluster.itemsRev)
			{
				sectionProxyListRev.Add(oi.Clone());
			}
			foreach (OverlapInfo oi in overlapCluster.itemsCurr)
			{
				sectionProxyListCurr.Add(oi.Clone());
			}

			// Build the list of section head correlation clusters
			SectionHeadCorrelationHelper shch = new SectionHeadCorrelationHelper();
			shch.DetermineSHCorrelationClusters(sectionProxyListCurr, sectionProxyListRev);
			return shch.m_clusterList;
		}
Exemplo n.º 4
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// A helper method for section cluster tests-
		/// Verifies the contents of the given Cluster.
		/// </summary>
		/// <param name="cluster">The given cluster.</param>
		/// <param name="refMin">The expected verse ref min.</param>
		/// <param name="refMax">The expected verse ref max.</param>
		/// <param name="type">The the expected cluster type.</param>
		/// <param name="expectedItemsCurr">The expected items for the Current
		/// (see VerifyClusterItems() for details).</param>
		/// <param name="expectedItemsRev">The expected items for the Revision
		/// (see VerifyClusterItems() for details)</param>
		/// <param name="indexToInsertAtInOther">The expected index</param>
		/// ------------------------------------------------------------------------------------
		private void VerifySectionCluster(Cluster cluster, int refMin, int refMax, ClusterType type,
			object expectedItemsCurr, object expectedItemsRev, int indexToInsertAtInOther)
		{
			//here we check the calling test code:
			// expected items should be consistent with the expected cluster type
			switch (type)
			{
				case ClusterType.MatchedItems:
					Assert.IsTrue(expectedItemsCurr is IScrSection || expectedItemsCurr is IScrTxtPara);
					Assert.IsTrue(expectedItemsRev is IScrSection || expectedItemsRev is IScrTxtPara);
					break;
				case ClusterType.MissingInCurrent:
					Assert.IsNull(expectedItemsCurr);
					Assert.IsTrue(expectedItemsRev is IScrSection || expectedItemsRev is IScrTxtPara);
					break;
				case ClusterType.AddedToCurrent:
					Assert.IsTrue(expectedItemsCurr is IScrSection || expectedItemsCurr is IScrTxtPara);
					Assert.IsNull(expectedItemsRev);
					break;
				case ClusterType.MultipleInBoth:
					Assert.IsTrue(expectedItemsCurr is List<IScrSection>);
					Assert.IsTrue(expectedItemsRev is List<IScrSection>);
					break;
				case ClusterType.SplitInCurrent:
					Assert.IsTrue(expectedItemsCurr is List<IScrSection>);
					Assert.IsTrue(expectedItemsRev is List<IScrSection>);
					break;
				case ClusterType.MergedInCurrent:
					Assert.IsTrue(expectedItemsCurr is List<IScrSection>);
					Assert.IsTrue(expectedItemsRev is List<IScrSection>);
					break;
				default:
					Assert.Fail("invalid type expected");
					break;
			}

			VerifyCluster(cluster, refMin, refMax, type, expectedItemsCurr, expectedItemsRev,
				indexToInsertAtInOther, ClusterKind.ScrSection);
		}
Exemplo n.º 5
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Extracts the matched items from the given cluster.
		/// </summary>
		/// <param name="cluster">The given cluster.</param>
		/// <param name="iItemCurr">The index of the Current item in the cluster.</param>
		/// <param name="iItemRev">The index of the Revision item in the cluster.</param>
		/// <param name="fFwd"><c>true</c> if this is the forward scan; false if backward scan.</param>
		/// ------------------------------------------------------------------------------------
		private void ExtractMatchedItemsCluster(Cluster cluster, int iItemCurr, int iItemRev,
			bool fFwd)
		{
			// Make a new cluster for the pair of matched items
			Cluster newCluster = new Cluster();
			newCluster.clusterType = ClusterType.MatchedItems;
			newCluster.verseRefMin = cluster.verseRefMin;
			newCluster.verseRefMax = cluster.verseRefMax;
			newCluster.itemsCurr.Add(cluster.itemsCurr[iItemCurr]); //use reference, not clone; the reference in original cluster will soon be deleted
			newCluster.itemsRev.Add(cluster.itemsRev[iItemRev]);

			m_clusterList.Add(newCluster);

			// If we are about to null out the last item on one side of the original cluster
			// (thus leaving orphans on the other side), we must set the indexToInsertAtInOther
			int newIndexToInsertAtInOther;
			if (ExtractingTheLastItemOnOneSide(cluster, iItemCurr, iItemRev, fFwd, out newIndexToInsertAtInOther))
				cluster.indexToInsertAtInOther = newIndexToInsertAtInOther;

			// Mark the items in the original complex cluster for later deletion
			cluster.itemsCurr[iItemCurr] = null;
			cluster.itemsRev[iItemRev] = null;
		}
Exemplo n.º 6
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Determines whether the given cluster is similar to this cluster.
		/// Purpose: Consective "similar" Added/Missing clusters may be combined and form a
		/// single difference.
		/// </summary>
		/// <param name="cluster">the given cluster to compare with</param>
		/// <returns> <c>true</c> if the given cluster is similar; otherwise, <c>false</c>.
		/// </returns>
		/// ------------------------------------------------------------------------------------
		public bool IsSimilar(Cluster cluster)
		{
			// if this cluster is not similar, we are done
			if (cluster.clusterType != this.clusterType)
				return false;
			if (cluster.indexToInsertAtInOther != this.indexToInsertAtInOther) //Dest index
				return false;

			return true;
		}
Exemplo n.º 7
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Extracts correlated pairs of ScrVerses from the given complex ScrVerse cluster.
		/// </summary>
		/// <param name="cluster">The given ScrVerse complex cluster in the master list. We may
		/// reduce the overlapping items in this cluster, but we must not remove it while we
		/// iterate through the clusters, lest we mangle our indexing.</param>
		/// <param name="scrVersesCurr">The list of Current ScrVerses.</param>
		/// <param name="scrVersesRev">The list of Revision ScrVerses.</param>
		/// <param name="correlationThreshold">The correlation threshold.</param>
		/// ------------------------------------------------------------------------------------
		private void ExtractCorrelatedPairsFromScrVerseCluster(Cluster cluster, List<ScrVerse> scrVersesCurr,
			List<ScrVerse> scrVersesRev, double correlationThreshold)
		{
			// Attempt correlation of ScrVerse pairs from the start and end of the cluster.
			// If greater than our correlation threshold, add a matched items cluster and remove
			// those ScrVerses from our complex cluster.

			// Forward Scan
			// start comparing at the beginning of cluster until we find Current and Revision strings below threshold.
			double correlationFactor;
			int iCorrelatedFwd = -1; // the last index correlated on the forward scan
			ScrVerse verseCurr, verseRev;
			for (int iClstrItem = 0; iClstrItem < cluster.itemsCurr.Count; iClstrItem++)
			{
				if (iClstrItem >= cluster.itemsRev.Count)
					break; //no more Rev items to compare to

				verseCurr = scrVersesCurr[cluster.itemsCurr[iClstrItem].indexInOwner];
				verseRev = scrVersesRev[cluster.itemsRev[iClstrItem].indexInOwner];

				// The references must match before we simplify clusters
				if (verseCurr.StartRef == verseRev.StartRef && verseCurr.EndRef == verseRev.EndRef)
				{
					correlationFactor = ParagraphCorrelation.DetermineStringCorrelation(
						(verseCurr.Text != null) ? verseCurr.Text.Text : null,
						(verseRev.Text != null) ? verseRev.Text.Text : null,
						m_cache.ServiceLocator.UnicodeCharProps);
					if (correlationFactor >= correlationThreshold)
					{
						// There is enough correlation to create a more-simple cluster here.
						ExtractMatchedItemsCluster(cluster, iClstrItem, iClstrItem, true);
						iCorrelatedFwd = iClstrItem;
					}
					else
					{
						// this correlation attempt failed,
						// so we are finished with the forward scan looking for correlated strings
						break;
					}
				}
				else
					break; // this correlation attempt failed because references don't match
			}

			if (iCorrelatedFwd == cluster.itemsCurr.Count - 1 && iCorrelatedFwd == cluster.itemsRev.Count - 1)
			{
				// entire cluster was correlated on the forward scan; there is no remaining blob.
				return;
			}

			// Backward Scan
			// Begin comparing at the end of the cluster until we find Current and Revision strings below threshold.
			//    at the verse number (that may cause an extra or missing verse number during revert)
			int cCorrelatedBkwrd = 0;
			for (int iClstrItemCurr = cluster.itemsCurr.Count - 1, iClstrItemRev = cluster.itemsRev.Count - 1;
				iClstrItemCurr >= 0 && iClstrItemRev >= 0;
				iClstrItemCurr--, iClstrItemRev--)
			{
				if (iClstrItemCurr <= iCorrelatedFwd)
					break; //we've reached the last Curr ScrVerse that was correlated on the forward scan
				if (iClstrItemRev <= iCorrelatedFwd)
					break; //we've reached the last Rev ScrVerse that was correlated on the forward scan

				verseCurr = scrVersesCurr[cluster.itemsCurr[iClstrItemCurr].indexInOwner];
				verseRev = scrVersesRev[cluster.itemsRev[iClstrItemRev].indexInOwner];

				if ((iClstrItemCurr == 0 || iClstrItemRev == 0) &&
					verseCurr.HasVerseNumberRun != verseRev.HasVerseNumberRun)
				{
					// We will not process a pair at iCurr==0 nor iRev ==0 when one starts with a verse number
					// and the other does not, to avoid messy comparisons
					break;
				}

				// The references must match before we simplify clusters
				if (verseCurr.StartRef == verseRev.StartRef && verseCurr.EndRef == verseRev.EndRef)
				{
					correlationFactor = ParagraphCorrelation.DetermineStringCorrelation(verseCurr.Text.Text,
						verseRev.Text.Text, m_cache.ServiceLocator.UnicodeCharProps);

					if (correlationFactor >= correlationThreshold)
					{
						// There is enough correlation to create a more-simple cluster here.
						ExtractMatchedItemsCluster(cluster, iClstrItemCurr, iClstrItemRev, false);
						cCorrelatedBkwrd++;
					}
					else
						break; // finished backward scan looking for correlated strings
				}
				else
					break; // finished backward scan because references don't match
			}
		}
Exemplo n.º 8
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Extracts paras that are mismatched in terms of being stanza breaks.
		/// </summary>
		/// <param name="cluster">The cluster.</param>
		/// ------------------------------------------------------------------------------------
		private void ExtractMismatchedParas(Cluster cluster)
		{
			Debug.Assert(cluster.clusterType == ClusterType.MatchedItems);
			if (cluster.itemsCurr[0].isStanzaBreak != cluster.itemsRev[0].isStanzaBreak)
			{
				// Create new cluster from current side.
				int newIndexToInsertAtInOther = cluster.itemsCurr[0].indexInOwner;
				AddMissingAddedCluster(newIndexToInsertAtInOther, 0, cluster, true);

				// Create new cluster from revision side.
				newIndexToInsertAtInOther = cluster.itemsRev[0].indexInOwner;
				AddMissingAddedCluster(newIndexToInsertAtInOther, 0, cluster, false);
			}
		}
Exemplo n.º 9
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Extracts the missing or added items from a cluster.
		/// </summary>
		/// <param name="cluster">The cluster.</param>
		/// <param name="iFrom">Beginning limit for extracting items.</param>
		/// <param name="iTo">Ending limit for extracting items.</param>
		/// <param name="fIsCurrent">if set to <c>true</c> extract from the current side;
		/// <c>false</c> extract from the revision side.</param>
		/// <param name="fFwd">if <c>true</c> scanning forward; otherwise scan backward</param>
		/// <returns>number of extracted added or missing empty ScrVerses</returns>
		/// ------------------------------------------------------------------------------------
		private int ExtractMissingAddedItems(Cluster cluster, int iFrom, int iTo, bool fIsCurrent,
			bool fFwd)
		{
			// Determine index to insert in other.
			int newIndexToInsertAtInOther;
			if (cluster.indexToInsertAtInOther != -1)
				newIndexToInsertAtInOther = cluster.indexToInsertAtInOther;
			else
				newIndexToInsertAtInOther = cluster.ItemList(fIsCurrent)[iFrom].indexInOwner;

			Debug.Assert(newIndexToInsertAtInOther != -1);

			int extracted = 0;
			if (fFwd)
			{
				// Scan forward from end of matching empty paras at beginning of cluster
				for (int iItem = iFrom; iItem < iTo; iItem++)
				{
					if (!cluster.Item(iItem, fIsCurrent).isStanzaBreak)
						break;
					AddMissingAddedCluster(newIndexToInsertAtInOther, iItem, cluster, fIsCurrent);
					extracted++;
				}
			}
			else
			{
				// Scan backward from end of matching empty paras at cluster end
				for (int iItem = iFrom; iItem > iTo; iItem--)
				{
					if (!cluster.Item(iItem, fIsCurrent).isStanzaBreak)
						break;
					// Since we are handling added/missing items at the end of the cluster,
					// we want to set the insertion point at the end of the other side.
					AddMissingAddedCluster(newIndexToInsertAtInOther, iItem, cluster, fIsCurrent);
					extracted++;
				}
			}

			return extracted;
		}
Exemplo n.º 10
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Extracts the empty leading or trailing paragraphs from a ScrVerse cluster.
		/// </summary>
		/// <param name="cluster">The cluster.</param>
		/// ------------------------------------------------------------------------------------
		private void ExtractStanzaBreaksFromScrVerseCluster(Cluster cluster)
		{
			// Forward Scan
			// Start comparing at the beginning of cluster until we no longer find empty paragraphs
			// in both the current AND revision.
			int iMatchingEmptyParaFwd = -1; // the last index of matching empty paragraphs on the forward scan
			for (int iClstrItem = 0; iClstrItem < cluster.itemsCurr.Count; iClstrItem++)
			{
				if (iClstrItem >= cluster.itemsRev.Count)
					break; //no more Rev items to compare to

				if (cluster.itemsCurr[iClstrItem].isStanzaBreak && cluster.itemsRev[iClstrItem].isStanzaBreak)
				{
					ExtractMatchedItemsCluster(cluster, iClstrItem, iClstrItem, true);
					iMatchingEmptyParaFwd = iClstrItem;
				}
				else
					break; // finished with forward scan (found content paras)
			}

			// Need to determine if either side has non-matching stanza breaks before content paras.
			if (cluster.IsStanzaBreak(iMatchingEmptyParaFwd + 1, true))
			{
				// Added empty paragraphs before current side. Move into added cluster(s)
				int endIndex = cluster.Items(true);
				ExtractMissingAddedItems(cluster, iMatchingEmptyParaFwd + 1, endIndex, true, true);
			}
			else if (cluster.IsStanzaBreak(iMatchingEmptyParaFwd + 1, false))
			{
				// Added empty paragraphs before revision side. Move into missing cluster(s)
				int endIndex = cluster.Items(false);
				ExtractMissingAddedItems(cluster, iMatchingEmptyParaFwd + 1, endIndex, false, true);
			}

			// Backward Scan
			// Start comparing at the end of cluster until we no longer find empty paragraphs
			// in both the current AND revision.
			int cCorrelatedBkwrd = 0;
			for (int iClstrItemCurr = cluster.itemsCurr.Count - 1, iClstrItemRev = cluster.itemsRev.Count - 1;
				 iClstrItemCurr > 0 && iClstrItemRev > 0;
				 iClstrItemCurr--, iClstrItemRev--)
			{
				if (iClstrItemCurr <= iMatchingEmptyParaFwd)
					break; //we've reached the last Curr ScrVerse that was matched on the forward scan
				if (iClstrItemRev <= iMatchingEmptyParaFwd)
					break; //we've reached the last Rev ScrVerse that was correlated on the forward scan

				if (cluster.itemsCurr[iClstrItemCurr].isStanzaBreak && cluster.itemsRev[iClstrItemRev].isStanzaBreak)
				{
					ExtractMatchedItemsCluster(cluster, iClstrItemCurr, iClstrItemRev, false);
					cCorrelatedBkwrd++;
				}
			}

			// Need to determine if either side has non-matching empty paragraphs after content paras.
			int iStartScanCurr = cluster.Items(true) - cCorrelatedBkwrd - 1;
			int iStartScanRev = cluster.Items(false) - cCorrelatedBkwrd - 1;
			if (cluster.IsStanzaBreak(iStartScanCurr, true))
			{
				// Added non-matching stanza breaks after current side. Move into new added cluster(s)
				ExtractMissingAddedItems(cluster, iStartScanCurr, iMatchingEmptyParaFwd + 1, true, false);
			}
			else if (cluster.IsStanzaBreak(iStartScanRev, false))
			{
				// Added non-matching stanza breaks after revision side. Move into new missing cluster(s)
				ExtractMissingAddedItems(cluster, iStartScanRev, iMatchingEmptyParaFwd + 1, false, false);
			}
		}
Exemplo n.º 11
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Extracts the missing or added empty paras from a ScrVerse cluster. This method does
		/// not attempt to find matching empty paragraphs.
		/// </summary>
		/// <param name="cluster">The cluster which should be an AddedToCurrent or
		/// MissingInCurrent cluster.</param>
		/// ------------------------------------------------------------------------------------
		private void ExtractMissingAddedEmptyParasFromScrVerseCluster(Cluster cluster)
		{
			Debug.Assert(cluster.clusterType == ClusterType.AddedToCurrent ||
				cluster.clusterType == ClusterType.OrphansInCurrent ||
				cluster.clusterType == ClusterType.MissingInCurrent ||
				cluster.clusterType == ClusterType.OrphansInRevision);

			bool fIsCurrent = Cluster.CurrentIsSource(cluster.clusterType);

			// Extract missing/added items from beginning of cluster.
			int removedFromStart =
				ExtractMissingAddedItems(cluster, 0, cluster.Items(fIsCurrent), fIsCurrent, true);

			// Extract missing/added items from end of cluster.
			ExtractMissingAddedItems(cluster, cluster.Items(fIsCurrent) - 1, removedFromStart,
				fIsCurrent, false);
		}
Exemplo n.º 12
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Simplifies the clusters with leading and/or trailing empty paras.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private void SimplifyLeadingTrailingEmptyParas(List<ScrVerse> scrVersesCurr)
		{
			// We need a copy of the original list to iterate through, because
			// the master list will likely need to have items added and removed
			Cluster[] clusterListCopy = new Cluster[m_clusterList.Count];
			m_clusterList.CopyTo(clusterListCopy);

			for (int iCluster = 0; iCluster < clusterListCopy.Length; iCluster++)
			{
				Cluster cluster = clusterListCopy[iCluster];
				if (cluster.clusterType == ClusterType.MultipleInBoth ||
					 cluster.clusterType == ClusterType.SplitInCurrent ||
					 cluster.clusterType == ClusterType.MergedInCurrent)
				{
					// In the master list, if possible, extract simpler clusters
					ExtractStanzaBreaksFromScrVerseCluster(clusterListCopy[iCluster]);
				}
				else if (cluster.clusterType == ClusterType.AddedToCurrent ||
					cluster.clusterType == ClusterType.OrphansInCurrent ||
					cluster.clusterType == ClusterType.MissingInCurrent ||
					cluster.clusterType == ClusterType.OrphansInRevision)
				{
					bool fIsCurrent = cluster.clusterType == ClusterType.AddedToCurrent;
					if (cluster.ItemList(fIsCurrent).Count > 1)
					{
						// Simplify added/missing clusters that have more than one item in the cluster.
						// We simplify them because items in them to separate empty paragraphs.
						ExtractMissingAddedEmptyParasFromScrVerseCluster(clusterListCopy[iCluster]);
					}
				}
				else if (cluster.clusterType == ClusterType.MatchedItems)
				{
					// If a match was made with a stanza break and an non-stanza break para, then
					// we need to break the cluster apart.
					ExtractMismatchedParas(clusterListCopy[iCluster]);
				}
			}

			CleanUpClusterListForRemovedItems();
		}
Exemplo n.º 13
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Finds the index to insert at in the other side (i.e. current or revision).
		/// </summary>
		/// <param name="cluster">The cluster that needs to know where it should be inserted.</param>
		/// <returns>index where this cluster should be inserted in the </returns>
		/// ------------------------------------------------------------------------------------
		private int FindIndexToInsertAtInOther(Cluster cluster)
		{
			Debug.Assert((cluster.itemsCurr.Count == 0 && cluster.itemsRev.Count > 0) ||
				(cluster.itemsRev.Count == 0 && cluster.itemsCurr.Count > 0),
				"This should be a one-sided cluster");
			// Set flag indicating which side (current or revision) has items.
			bool fCurrentHasItems = cluster.itemsCurr.Count > 0;

			foreach (Cluster clstr in m_clusterList)
			{
				int numItemsOtherSide = clstr.Items(!fCurrentHasItems);
				if (numItemsOtherSide > 0)
				{
					// Since the other side in this cluster has items, then it is a candidate.
					OverlapInfo lastItemOtherSide = clstr.Item(numItemsOtherSide - 1, !fCurrentHasItems);
					if (cluster.verseRefMin >= lastItemOtherSide.verseRefMin)
					{
						// Since we are beyond the reference on the other side, set the index after this cluster
						// on the other side.
						return lastItemOtherSide.indexInOwner + 1;
					}
				}
			}

			return 0;
		}
Exemplo n.º 14
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Determine if we are extracting the last item on one side of the given cluster
		/// (thus leaving orphans on the other side).
		///  If so, calculate the necessary indexToInsertAtInOther for the cluster.
		/// </summary>
		/// <param name="cluster">The given cluster.</param>
		/// <param name="iItemCurr">The index of the Current item in the cluster.</param>
		/// <param name="iItemRev">The index of the Revision item in the cluster.</param>
		/// <param name="fFwd"><c>true</c> if this is the forward scan; false if backward scan.</param>
		/// <param name="newIndexToInsertAtInOther">Out: The new index to insert at in other.</param>
		/// <returns>true if we are indeed extracting the last item on one side, and calculating the
		/// new index to insert at in other</returns>
		/// ------------------------------------------------------------------------------------
		private bool ExtractingTheLastItemOnOneSide(Cluster cluster, int iItemCurr, int iItemRev, bool fFwd,
			out int newIndexToInsertAtInOther)
		{
			newIndexToInsertAtInOther = -1;

			// if we have the same number of items on both sides, it's impossible to leave orphans on one side
			if (cluster.itemsCurr.Count == cluster.itemsRev.Count)
				return false;

			if (fFwd)
			{
				// on the forward scan
				// If we're at the last item on either side of the cluster,
				//  the insert index just beyond my index.
				if (iItemCurr == cluster.itemsCurr.Count - 1)
					newIndexToInsertAtInOther = cluster.itemsCurr[iItemCurr].indexInOwner + 1;
				else if (iItemRev == cluster.itemsRev.Count - 1)
					newIndexToInsertAtInOther = cluster.itemsRev[iItemRev].indexInOwner + 1;
			}
			else
			{
				// we're on the backward scan
				Debug.Assert(iItemCurr >= 0 && iItemRev >= 0);
				// If the item above me is null or I'm the first value, then I'm the last
				//  non-null item on this side, and the insert index is at my index.
				if (iItemCurr == 0 || cluster.itemsCurr[iItemCurr - 1] == null)
					newIndexToInsertAtInOther = cluster.itemsCurr[iItemCurr].indexInOwner;
				else if (iItemRev == 0 || cluster.itemsRev[iItemRev - 1] == null)
					newIndexToInsertAtInOther = cluster.itemsRev[iItemRev].indexInOwner;
			}

			// return true if we found the critter
			return (newIndexToInsertAtInOther > -1);
		}
Exemplo n.º 15
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Creates a shallow copy of this cluster and returns it.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public Cluster Clone()
		{
			// Create the cloned cluster ...
			Cluster toReturn = new Cluster();
			// ... And dump our data into it, making shallow copies of the ArrayLists
			toReturn.clusterType = clusterType;
			toReturn.verseRefMax = verseRefMax;
			toReturn.verseRefMin = verseRefMin;
			toReturn.indexToInsertAtInOther = indexToInsertAtInOther;
			toReturn.itemsCurr = new List<OverlapInfo>(itemsCurr.ToArray());
			toReturn.itemsRev = new List<OverlapInfo>(itemsRev.ToArray());
			toReturn.sortKey = sortKey;

			// Finally, return the clone
			return toReturn;
		}
Exemplo n.º 16
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Make a new cluster for missing/added.
		/// </summary>
		/// <param name="insertInOtherIndex">Index where added/missing cluster should be inserted in other.</param>
		/// <param name="iItem">The index of the item.</param>
		/// <param name="cluster">The cluster.</param>
		/// <param name="fIsCurrent">if set to <c>true</c> extract from the current side;
		/// <c>false</c> extract from the revision side.</param>
		/// ------------------------------------------------------------------------------------
		private void AddMissingAddedCluster(int insertInOtherIndex, int iItem, Cluster cluster, bool fIsCurrent)
		{
			Cluster newCluster = new Cluster();
			newCluster.clusterType = (fIsCurrent) ? ClusterType.AddedToCurrent : ClusterType.MissingInCurrent;
			newCluster.verseRefMin = cluster.verseRefMin;
			newCluster.verseRefMax = cluster.verseRefMax;
			newCluster.indexToInsertAtInOther = insertInOtherIndex;
			newCluster.ItemList(fIsCurrent).Add(cluster.Item(iItem, fIsCurrent));
			m_clusterList.Add(newCluster);

			// Mark the item in the original complex cluster for later deletion
			if (fIsCurrent)
				cluster.itemsCurr[iItem] = null;
			else
				cluster.itemsRev[iItem] = null;
		}
Exemplo n.º 17
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Merges the given cluster's source item into this cluster.
		/// This is valid only for similar Missing/Added Cluster types.
		/// </summary>
		/// <param name="cluster">the given cluster.</param>
		/// ------------------------------------------------------------------------------------
		public void MergeSourceItems(Cluster cluster)
		{
			// our given cluster must be 'similar'
			Debug.Assert(this.IsSimilar(cluster));

			// Do the merge
			// our given cluster normally has only one source item; that's all we'll accomodate for now
			Debug.Assert(cluster.SourceItems.Count == 1);
			AddSourceItem((OverlapInfo)cluster.SourceItems[0]);
		}
Exemplo n.º 18
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Create basic overlap clusters from the OverlapInfo proxies for the Current and
		/// Revision.  Note that FindOverlappedPairs() must be called first.  The clusters
		/// will still need their types and insertIndices determined.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		protected void CreateBasicOverlapClusters()
		{
			// Find all overlapped pairs
			FindOverlappedPairs();

			// output list that will ultimatly contain all of the clusters we find
			List<Cluster> clusterList = new List<Cluster>();

			// Create destructible copies of each master list, allowing already
			// grouped items to be removed, avoiding possible infinite loops
			// created when the lists are out-of-order
			List<OverlapInfo> proxyListRevCopy = new List<OverlapInfo>(m_proxyListRev.ToArray());
			List<OverlapInfo> proxyListCurrCopy = new List<OverlapInfo>(m_proxyListCurr.ToArray());

			// So long as there are remaining proxies, keep searching for new
			// clusters
			while (proxyListRevCopy.Count > 0 || proxyListCurrCopy.Count > 0)
			{
				// The proxy to start the new cluster search from
				OverlapInfo firstProxy;

				// If both lists have remaining proxies, choose the lowest
				// of the two
				if (proxyListRevCopy.Count > 0 && proxyListCurrCopy.Count > 0)
				{
					//get the next one with the first start reference
					// (note: if refs are equal, doesn't matter which one)
					OverlapInfo oiRev = proxyListRevCopy[0];
					OverlapInfo oiCurr = proxyListCurrCopy[0];
					if (oiRev.verseRefMin < oiCurr.verseRefMin)
						firstProxy = oiRev;
					else
						firstProxy = oiCurr;
				}
				// Otherwise, use whatever remains
				else if (proxyListRevCopy.Count > 0)
					firstProxy = proxyListRevCopy[0];
				else
					firstProxy = proxyListCurrCopy[0];

				// the queue of proxies that will be used form a cluster
				Queue<OverlapInfo> queue = new Queue<OverlapInfo>();

				// list of those proxies that have been added to the queue
				// so that they will not be added again, causing errors
				List<OverlapInfo> visited = new List<OverlapInfo>();

				// list for accumulating proxies (both Curr and Rev) that we
				// find for a cluster
				List<OverlapInfo> proxyListForCluster = new List<OverlapInfo>();

				// Prime the queue by putting the first starting point
				// into it and marking that it's been visited
				queue.Enqueue(firstProxy);
				visited.Add(firstProxy);

				// Now that the resources are set up, begin the cluster
				// search.  The queue is used to make a breadth-first
				// search of the tree of relationships that exists
				// between overlapping proxies, with the visited
				// list making sure that no proxy that has already
				// been included in the cluster will be included again
				// (avoiding a potentially infinite cluster search)
				while (queue.Count > 0)
				{
					// Remove the next item in the queue
					OverlapInfo currentProxy = queue.Dequeue();

					// Remove the current item from it's corresponding list, now that it
					// has been used
					if (currentProxy.bookIsFromRev)
					{
						proxyListRevCopy.Remove(currentProxy);
					}
					else
					{
						proxyListCurrCopy.Remove(currentProxy);
					}

					// Push the current overlap proxy's children onto the queue,
					// so long as they haven't already been there
					foreach (OverlapInfo child in currentProxy.overlappedItemsInOther)
					{
						if (!visited.Contains(child))
						{
							// Enqueue the child and note that it's been visited
							queue.Enqueue(child);
							visited.Add(child);
						}
					}

					// Add the current proxy to the list for the cluster
					proxyListForCluster.Add(currentProxy);
				}

				// Create a cluster with the items we have accumulated
				Cluster cluster = new Cluster();
				foreach (OverlapInfo oi in proxyListForCluster)
				{
					// Update the verse ref range for the cluster
					if (cluster.verseRefMin == 0)
					{
						cluster.verseRefMin = oi.verseRefMin;
						cluster.verseRefMax = oi.verseRefMax;
					}
					else
					{
						cluster.verseRefMin = Math.Min(cluster.verseRefMin, oi.verseRefMin);
						cluster.verseRefMax = Math.Max(cluster.verseRefMax, oi.verseRefMax);
					}

					// Add the item to the cluster lists, according to its book type
					if (oi.bookIsFromRev)
						cluster.itemsRev.Add(oi);
					else
						cluster.itemsCurr.Add(oi);
				}
				// and save this new cluster in our output list of clusters
				clusterList.Add(cluster);
			}

			// save our cluster list
			m_clusterList = clusterList;
		}
Exemplo n.º 19
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Simplifies the complex ScrVerse clusters that have some correlated pairs.
		/// </summary>
		/// <param name="scrVersesCurr">The list of Current ScrVerses.</param>
		/// <param name="scrVersesRev">The list of Revision ScrVerses.</param>
		/// <param name="correlationThreshold">The correlation threshold.</param>
		/// ------------------------------------------------------------------------------------
		private void SimplifyComplexScrVerseClusters(List<ScrVerse> scrVersesCurr,
			List<ScrVerse> scrVersesRev, double correlationThreshold)
		{
			// We need a copy of the original list to iterate through, because
			// the master list will likely need to have items added and removed
			Cluster[] clusterListCopy = new Cluster[m_clusterList.Count];
			m_clusterList.CopyTo(clusterListCopy);

			for (int iCluster = 0; iCluster < clusterListCopy.Length; iCluster++)
			{
				Cluster cluster = clusterListCopy[iCluster];
				if ((cluster.clusterType == ClusterType.MultipleInBoth ||
					cluster.clusterType == ClusterType.SplitInCurrent ||
					cluster.clusterType == ClusterType.MergedInCurrent) &&
					// we don't simplify a complex cluster caused by a network of verse bridge overlaps
					!cluster.ContainsVerseBridgeDifference && cluster.SpansParaBreak)
				{
					// In the master list, if possible, extract simpler clusters
					ExtractCorrelatedPairsFromScrVerseCluster(m_clusterList[iCluster], scrVersesCurr, scrVersesRev,
						correlationThreshold);
				}
			}

			CleanUpClusterListForRemovedItems();
		}
Exemplo n.º 20
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Create basic overlap clusters from the OverlapInfo proxies for the Current and
		/// Revision.
		/// In this case we include only proxies that are adjacent to one another in the
		/// owner's sequence in the Current or Revision (e.g. adjacent ScrVerses in the same section).
		/// The clusters will still need their types and insert Indices determined.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private void CreateBasicAdjacentOverlapClusters()
		{
			// output list that will ultimatly contain all of the clusters we find
			List<Cluster> clusterList = new List<Cluster>();

			// Create destructible copies of each master list, allowing already
			// grouped items to be removed, avoiding possible infinite loops
			// created when the lists are out-of-order
			List<OverlapInfo> proxyListRevCopy = new List<OverlapInfo>(m_proxyListRev.ToArray());
			List<OverlapInfo> proxyListCurrCopy = new List<OverlapInfo>(m_proxyListCurr.ToArray());

			Cluster cluster = null; // the cluster under construction at any given time

			// So long as there are remaining proxies, keep working through them
			while (proxyListRevCopy.Count > 0 || proxyListCurrCopy.Count > 0)
			{
				// The proxy to process this pass- the first in either the Current or Rev processing list
				OverlapInfo firstProxy;

				// If both lists have remaining proxies...
				if (proxyListRevCopy.Count > 0 && proxyListCurrCopy.Count > 0)
				{
					OverlapInfo oiRev = proxyListRevCopy[0];
					OverlapInfo oiCurr = proxyListCurrCopy[0];
					// if we are starting a new cluster...
					if (cluster == null)
					{
						if ((oiRev.isStanzaBreak && !oiCurr.isStanzaBreak) ||
							(!oiRev.isStanzaBreak && oiCurr.isStanzaBreak))
						{
							// only one of the current or revision is a stanza break. Get the stanza break.
							firstProxy = oiRev.isStanzaBreak ? oiRev : oiCurr;
						}

						else
						{
							//get the next one with the earlier start reference
							// (if refs are equal, doesn't matter which one)
							if (oiRev.verseRefMin < oiCurr.verseRefMin)
								firstProxy = oiRev;
							else
								firstProxy = oiCurr;
						}
					}
					else
					{
						// See if either side has a proxy that overlaps our cluster under construction
						if (CanBeIncluded(cluster, oiRev))
							firstProxy = oiRev;
						else if (CanBeIncluded(cluster, oiCurr))
							firstProxy = oiCurr;

						else
						{
							//Neither proxy overlaps with our cluster.
							// save the cluster in progress, and prepare to start a new one
							clusterList.Add(cluster);
							cluster = null;
							continue;
						}
					}
				}
				// Otherwise, use whatever remains
				else if (proxyListRevCopy.Count > 0)
						firstProxy = proxyListRevCopy[0];
				else
						firstProxy = proxyListCurrCopy[0];

				// Now add this proxy to a cluster
				if (cluster == null)
				{
					// This is the first item for this cluster
					cluster = new Cluster();
					AddItemToCluster(cluster, firstProxy);
				}
				else if (CanBeIncluded(cluster, firstProxy))
				{
					// This proxy overlaps our cluster. Grab it.
					AddItemToCluster(cluster, firstProxy);
				}
				else
				{
					//This proxy is NOT overlapping with our cluster.
					// save the cluster in progress
					clusterList.Add(cluster);
					// start a new cluster for this proxy
					cluster = new Cluster();
					AddItemToCluster(cluster, firstProxy);
				}

				// Remove the current proxy from it's corresponding list, now that it
				// has been used
				if (firstProxy.bookIsFromRev)
					proxyListRevCopy.Remove(firstProxy);
				else
					proxyListCurrCopy.Remove(firstProxy);
			}

			// save the final cluster, if any
			if (cluster != null)
			{
				Debug.Assert(cluster.itemsCurr.Count > 0 || cluster.itemsRev.Count > 0);
				clusterList.Add(cluster);
			}

			// save our cluster list
			m_clusterList = clusterList;
		}
Exemplo n.º 21
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Verifies expected characteristics of any kind of cluster.
		/// </summary>
		/// <param name="cluster">The cluster.</param>
		/// <param name="refMin">The reference min.</param>
		/// <param name="refMax">The reference max.</param>
		/// <param name="type">The cluster type.</param>
		/// <param name="expectedItemsCurr">The expected items in the Current.</param>
		/// <param name="expectedItemsRev">The expected items in the Revision.</param>
		/// <param name="indexToInsertAtInOther">The index to insert at in other.</param>
		/// <param name="kindOfCluster">The kind of cluster.</param>
		/// ------------------------------------------------------------------------------------
		private void VerifyCluster(Cluster cluster, int refMin, int refMax, ClusterType type,
			object expectedItemsCurr, object expectedItemsRev, int indexToInsertAtInOther,
			ClusterKind kindOfCluster)
		{
			// verify the basics
			Assert.AreEqual(refMin, cluster.verseRefMin);
			Assert.AreEqual(refMax, cluster.verseRefMax);
			Assert.AreEqual(type, cluster.clusterType);

			// verify the indexToInsertAtInOther
			Assert.AreEqual(indexToInsertAtInOther, cluster.indexToInsertAtInOther);

			// now verify the cluster's items
			switch (kindOfCluster)
			{
				case ClusterKind.ScrSection:
					VerifySectionClusterItems(expectedItemsCurr, cluster.itemsCurr, kindOfCluster);
					VerifySectionClusterItems(expectedItemsRev, cluster.itemsRev, kindOfCluster);
					break;
				case ClusterKind.ScrVerse:
					VerifyScrVerseClusterItems(expectedItemsCurr, cluster.itemsCurr, kindOfCluster);
					VerifyScrVerseClusterItems(expectedItemsRev, cluster.itemsRev, kindOfCluster);
					break;
			}
		}
Exemplo n.º 22
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Determines whether a given overlap info can be included in the current cluster.
		/// To be included in the cluster, it must meet the following criteria:
		///  * have a reference overlap with the specified cluster,
		///  * be adjacent to and within the same owning sequence (e.g. same StText), and
		/// </summary>
		/// <param name="cluster">The cluster.</param>
		/// <param name="oi">The overlap info.</param>
		/// <returns><c>true</c> if the specified OverlapInfo can be included in the cluster</returns>
		/// ------------------------------------------------------------------------------------
		private bool CanBeIncluded(Cluster cluster, OverlapInfo oi)
		{
			// is oi completely before the cluster range?
			if (oi.verseRefMax < cluster.verseRefMin)
				return false;
			// is cluster range completely before oi?
			if (cluster.verseRefMax < oi.verseRefMin)
				return false;
			// there must be some overlap

			// is oi for a ScrVerse contained in the same StText?
			if (oi.bookIsFromRev)
			{
				if (cluster.itemsRev.Count > 0 &&
					oi.myParaOwner != cluster.itemsRev[cluster.itemsRev.Count - 1].myParaOwner)
				{
					return false;  // paragraph does not have the same owning StText
				}
			}
			else
			{
				if (cluster.itemsCurr.Count > 0 &&
					oi.myParaOwner != cluster.itemsCurr[cluster.itemsCurr.Count - 1].myParaOwner)
				{
					return false;  // not the next item in the owner's sequence
				}
			}

			return true;
		}
Exemplo n.º 23
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// A helper method for section cluster tests-
		/// Verifies the contents of the given Cluster. This overload lets the caller ignore
		/// the indexToInsertAtInOther, which is only needed for Missing/Added clusters.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private void VerifySectionCluster(Cluster cluster, int refMin, int refMax, ClusterType type,
			object expectedItemsCurr, object expectedItemsRev)
		{
			Assert.IsTrue(cluster.clusterType != ClusterType.MissingInCurrent &&
				cluster.clusterType != ClusterType.AddedToCurrent,
				"Missing/Added clusters must be verified by passing in the indexToInsertAtInOther parameter.");

			// verify the details
			VerifySectionCluster(cluster, refMin, refMax, type, expectedItemsCurr, expectedItemsRev, -1);
		}
Exemplo n.º 24
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Adds the item to cluster.
		/// </summary>
		/// <param name="cluster">The cluster.</param>
		/// <param name="oi">The overlap info.</param>
		/// ------------------------------------------------------------------------------------
		private void AddItemToCluster(Cluster cluster, OverlapInfo oi)
		{
			if (oi.myBook == OverlapInfo.kCurrent)
				cluster.itemsCurr.Add(oi);
			else if (oi.myBook == OverlapInfo.kRevision)
				cluster.itemsRev.Add(oi);

			// update the cluster reference range
			if (oi.verseRefMin < cluster.verseRefMin || cluster.verseRefMin == 0)
				cluster.verseRefMin = oi.verseRefMin;
			if (oi.verseRefMax > cluster.verseRefMax)
				cluster.verseRefMax = oi.verseRefMax;
		}