/* # McIlroy-Hunt diff algorithm # Adapted from the Smalltalk code of Mario I. Wolczko, <*****@*****.**> # by Ned Konz, [email protected] # Updates by Tye McQueen, http://perlmonks.org/?node=tye # # Create a hash that maps each element of $aCollection to the set of # positions it occupies in $aCollection, restricted to the elements # within the range of indexes specified by $start and $end. # The fourth parameter is a subroutine reference that will be called to # generate a string to use as a key. # Additional parameters, if any, will be passed to this subroutine. # # my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen ); */ Hashtable WithPositionsOfInInterval(IList aCollection, int startIndex, int endIndex) { Hashtable d = new Hashtable(hashcoder, comparer); for (int index = startIndex; index <= endIndex; index++) { object element = aCollection[index]; if (d.ContainsKey(element)) { IntList list = (IntList)d[element]; list.Add(index); } else { IntList list = new IntList(); list.Add(index); d[element] = list; } } foreach (IntList list in d.Values) { list.Reverse(); } return(d); }
private int _replaceNextLargerWith(IntList array, int value, int high) { if (high <= 0) { high = array.Count - 1; } if ((high == -1) || (value > array[array.Count - 1])) { array.Add(value); return(array.Count - 1); } int num = 0; while (num <= high) { int num2 = (high + num) / 2; int num3 = array[num2]; if (value == num3) { return(-1); } if (value > num3) { num = num2 + 1; } else { high = num2 - 1; } } array[num] = value; return(num); }
/*void prepare(IList list) { * prepared = _withPositionsOfInInterval(list, 0, list.Count-1); * preparedlist = list; * }*/ void LongestCommonSubsequenceIndex(IList a, IList b, out IntList am, out IntList bm) { IntList match = LongestCommonSubsequence(a, b); am = new IntList(); for (int i = 0; i < match.Count; i++) { if (match[i] != -1) { am.Add(i); } } bm = new IntList(); for (int vi = 0; vi < am.Count; vi++) { bm.Add(match[am[vi]]); } }
private void LCSidx(IList a, IList b, out IntList am, out IntList bm) { IntList list = this._longestCommonSubsequence(a, b); am = new IntList(); for (int i = 0; i < list.Count; i++) { if (list[i] != -1) { am.Add(i); } } bm = new IntList(); for (int j = 0; j < am.Count; j++) { bm.Add(list[am[j]]); } }
IntList compact_diff(IList a, IList b) { IntList am, bm, cdiff; LCSidx(a, b, out am, out bm); cdiff = new IntList(); int ai = 0, bi = 0; cdiff.Add(ai); cdiff.Add(bi); while (true) { while (am.Count > 0 && ai == (int)am[0] && bi == (int)bm[0]) { am.RemoveAt(0); bm.RemoveAt(0); ++ai; ++bi; } cdiff.Add(ai); cdiff.Add(bi); if (am.Count == 0) { break; } ai = (int)am[0]; bi = (int)bm[0]; cdiff.Add(ai); cdiff.Add(bi); } if (ai < a.Count || bi < b.Count) { cdiff.Add(a.Count); cdiff.Add(b.Count); } return(cdiff); }
IntList compact_diff(IList a, IList b) { IntList am, bm; LongestCommonSubsequenceIndex(a, b, out am, out bm); IntList newCompactDiff = new IntList(); int ai = 0, bi = 0; newCompactDiff.Add(ai); newCompactDiff.Add(bi); while (true) { while (am.Count > 0 && ai == am[0] && bi == bm[0]) { am.RemoveAt(0); bm.RemoveAt(0); ++ai; ++bi; } newCompactDiff.Add(ai); newCompactDiff.Add(bi); if (am.Count == 0) { break; } ai = am[0]; bi = bm[0]; newCompactDiff.Add(ai); newCompactDiff.Add(bi); } if (ai < a.Count || bi < b.Count) { newCompactDiff.Add(a.Count); newCompactDiff.Add(b.Count); } return(newCompactDiff); }
/* # Find the place at which aValue would normally be inserted into the # array. If that place is already occupied by aValue, do nothing, and # return undef. If the place does not exist (i.e., it is off the end of # the array), add it to the end, otherwise replace the element at that # point with aValue. It is assumed that the array's values are numeric. # This is where the bulk (75%) of the time is spent in this module, so # try to make it fast! */ // NOTE: Instead of returning undef, it returns -1. int ReplaceNextLargerWith(IntList array, int value, int high) { if (high <= 0) { high = array.Count - 1; } // off the end? if (high == -1 || value > array[array.Count - 1]) { array.Add(value); return(array.Count - 1); } // binary search for insertion point... int low = 0; while (low <= high) { int index = (high + low) / 2; int found = array[index]; if (value == found) { return(-1); } if (value > found) { low = index + 1; } else { high = index - 1; } } // # now insertion point is in $low. array[low] = value; // overwrite next larger return(low); }
private IntList compact_diff(IList a, IList b) { IntList am; IntList bm; this.LCSidx(a, b, out am, out bm); IntList list3 = new IntList(); int num = 0; int num2 = 0; list3.Add(num); list3.Add(num2); while (true) { while (((am.Count > 0) && (num == am[0])) && (num2 == bm[0])) { am.RemoveAt(0); bm.RemoveAt(0); num++; num2++; } list3.Add(num); list3.Add(num2); if (am.Count == 0) { break; } num = am[0]; num2 = bm[0]; list3.Add(num); list3.Add(num2); } if ((num < a.Count) || (num2 < b.Count)) { list3.Add(a.Count); list3.Add(b.Count); } return(list3); }
private Hashtable _withPositionsOfInInterval(IList aCollection, int start, int end) { Hashtable hashtable = new Hashtable(this.hashcoder); for (int i = start; i <= end; i++) { object key = aCollection[i]; if (hashtable.ContainsKey(key)) { ((IntList)hashtable[key]).Add(i); } else { IntList list2 = new IntList(); list2.Add(i); hashtable[key] = list2; } } foreach (IntList list3 in hashtable.Values) { list3.Reverse(); } return(hashtable); }
/* # McIlroy-Hunt diff algorithm # Adapted from the Smalltalk code of Mario I. Wolczko, <*****@*****.**> # by Ned Konz, [email protected] # Updates by Tye McQueen, http://perlmonks.org/?node=tye # Create a hash that maps each element of $aCollection to the set of # positions it occupies in $aCollection, restricted to the elements # within the range of indexes specified by $start and $end. # The fourth parameter is a subroutine reference that will be called to # generate a string to use as a key. # Additional parameters, if any, will be passed to this subroutine. # # my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen ); */ Hashtable _withPositionsOfInInterval(IList aCollection, int start, int end) { Hashtable d = new Hashtable(hashcoder, comparer); for (int index = start; index <= end; index++) { object element = aCollection[index]; if (d.ContainsKey(element)) { IntList list = (IntList)d[element]; list.Add(index); } else { IntList list = new IntList(); list.Add(index); d[element] = list; } } foreach (IntList list in d.Values) list.Reverse(); return d; }
/* # Find the place at which aValue would normally be inserted into the # array. If that place is already occupied by aValue, do nothing, and # return undef. If the place does not exist (i.e., it is off the end of # the array), add it to the end, otherwise replace the element at that # point with aValue. It is assumed that the array's values are numeric. # This is where the bulk (75%) of the time is spent in this module, so # try to make it fast! */ // NOTE: Instead of returning undef, it returns -1. int _replaceNextLargerWith(IntList array, int value, int high) { if (high <= 0) high = array.Count-1; // off the end? if (high == -1 || value > (int)array[array.Count-1]) { array.Add(value); return array.Count-1; } // binary search for insertion point... int low = 0; int index, found; while (low <= high) { index = (high + low) / 2; found = (int)array[index]; if (value == found) return -1; else if (value > found) low = index + 1; else high = index - 1; } // # now insertion point is in $low. array[low] = value; // overwrite next larger return low; }
IntList _longestCommonSubsequence(IList a, IList b) { int aStart = 0; int aFinish = a.Count-1; IntList matchVector = new IntList(); Hashtable bMatches; // initialize matchVector to length of a for (int i = 0; i < a.Count; i++) matchVector.Add(-1); if (!IsPrepared(out bMatches)) { int bStart = 0; int bFinish = b.Count-1; // First we prune off any common elements at the beginning while (aStart <= aFinish && bStart <= bFinish && compare(a[aStart], b[bStart])) matchVector[aStart++] = bStart++; // now the end while (aStart <= aFinish && bStart <= bFinish && compare(a[aFinish], b[bFinish])) matchVector[aFinish--] = bFinish--; // Now compute the equivalence classes of positions of elements bMatches = _withPositionsOfInInterval(b, bStart, bFinish); } IntList thresh = new IntList(); TrioList links = new TrioList(); for (int i = aStart; i <= aFinish; i++) { IntList aimatches = (IntList)bMatches[a[i]]; if (aimatches != null) { int k = 0; for (int ji = 0; ji < aimatches.Count; ji++) { int j = aimatches[ji]; // # optimization: most of the time this will be true if (k>0 && (int)thresh[k] > j && (int)thresh[k-1] < j) thresh[k] = j; else k = _replaceNextLargerWith(thresh, j, k); // oddly, it's faster to always test this (CPU cache?). if (k != -1) { Trio t = new Trio( (Trio)( k>0 ? links[k-1] : null ), i, j ); if (k == links.Count) links.Add( t ); else links[k] = t; } } } } if (thresh.Count > 0) { for (Trio link = (Trio)links[thresh.Count-1]; link != null; link = link.a) matchVector[link.b] = link.c; } return matchVector; }
/*void prepare(IList list) { prepared = _withPositionsOfInInterval(list, 0, list.Count-1); preparedlist = list; }*/ void LCSidx(IList a, IList b, out IntList am, out IntList bm) { IntList match = _longestCommonSubsequence(a, b); am = new IntList(); for (int i = 0; i < match.Count; i++) if ((int)match[i] != -1) am.Add(i); bm = new IntList(); for (int vi = 0; vi < am.Count; vi++) bm.Add(match[am[vi]]); }
IntList compact_diff(IList a, IList b) { IntList am, bm, cdiff; LCSidx(a, b, out am, out bm); cdiff = new IntList(); int ai = 0, bi = 0; cdiff.Add(ai); cdiff.Add(bi); while (true) { while(am.Count > 0 && ai == (int)am[0] && bi == (int)bm[0]) { am.RemoveAt(0); bm.RemoveAt(0); ++ai; ++bi; } cdiff.Add(ai); cdiff.Add(bi); if (am.Count == 0) break; ai = (int)am[0]; bi = (int)bm[0]; cdiff.Add(ai); cdiff.Add(bi); } if (ai < a.Count || bi < b.Count) { cdiff.Add(a.Count); cdiff.Add(b.Count); } return cdiff; }
private IntList _longestCommonSubsequence(IList a, IList b) { Hashtable bMatches; int num = 0; int num2 = a.Count - 1; IntList list = new IntList(); for (int i = 0; i < a.Count; i++) { list.Add(-1); } if (!this.IsPrepared(out bMatches)) { int start = 0; int end = b.Count - 1; while (((num <= num2) && (start <= end)) && this.compare(a[num], b[start])) { list[num++] = start++; } while (((num <= num2) && (start <= end)) && this.compare(a[num2], b[end])) { list[num2--] = end--; } bMatches = this._withPositionsOfInInterval(b, start, end); } IntList array = new IntList(); ArrayList list3 = new ArrayList(); for (int j = num; j <= num2; j++) { IntList list4 = (IntList)bMatches[a[j]]; if (list4 != null) { int high = 0; for (int k = 0; k < list4.Count; k++) { int num9 = list4[k]; if (((high > 0) && (array[high] > num9)) && (array[high - 1] < num9)) { array[high] = num9; } else { high = this._replaceNextLargerWith(array, num9, high); } if (high != -1) { Trio trio = new Trio((high > 0) ? ((Trio)list3[high - 1]) : null, j, num9); if (high == list3.Count) { list3.Add(trio); } else { list3[high] = trio; } } } } } if (array.Count > 0) { for (Trio trio2 = (Trio)list3[array.Count - 1]; trio2 != null; trio2 = trio2.a) { list[trio2.b] = trio2.c; } } return(list); }
IntList LongestCommonSubsequence(IList a, IList b) { int aStart = 0; int aFinish = a.Count - 1; IntList matchVector = new IntList(); Hashtable bMatches; // initialize matchVector to length of a for (int i = 0; i < a.Count; i++) { matchVector.Add(-1); } if (!IsPrepared(out bMatches)) { int bStart = 0; int bFinish = b.Count - 1; // First we prune off any common elements at the beginning while (aStart <= aFinish && bStart <= bFinish && compare(a[aStart], b[bStart])) { matchVector[aStart++] = bStart++; } // now the end while (aStart <= aFinish && bStart <= bFinish && compare(a[aFinish], b[bFinish])) { matchVector[aFinish--] = bFinish--; } // Now compute the equivalence classes of positions of elements bMatches = WithPositionsOfInInterval(b, bStart, bFinish); } IntList thresh = new IntList(); TrioList links = new TrioList(); for (int i = aStart; i <= aFinish; i++) { IntList aimatches = (IntList)bMatches[a[i]]; if (aimatches != null) { int k = 0; for (int ji = 0; ji < aimatches.Count; ji++) { int j = aimatches[ji]; // # optimization: most of the time this will be true if (k > 0 && thresh[k] > j && thresh[k - 1] < j) { thresh[k] = j; } else { k = ReplaceNextLargerWith(thresh, j, k); } // oddly, it's faster to always test this (CPU cache?). if (k != -1) { Trio t = new Trio((Trio)(k > 0 ? links[k - 1] : null), i, j); if (k == links.Count) { links.Add(t); } else { links[k] = t; } } } } } if (thresh.Count > 0) { for (Trio link = (Trio)links[thresh.Count - 1]; link != null; link = link.a) { matchVector[link.b] = link.c; } } return(matchVector); }
IntList compact_diff(IList a, IList b) { IntList am, bm; LongestCommonSubsequenceIndex(a, b, out am, out bm); IntList newCompactDiff = new IntList(); int ai = 0, bi = 0; newCompactDiff.Add(ai); newCompactDiff.Add(bi); while (true) { while (am.Count > 0 && ai == am[0] && bi == bm[0]) { am.RemoveAt(0); bm.RemoveAt(0); ++ai; ++bi; } newCompactDiff.Add(ai); newCompactDiff.Add(bi); if (am.Count == 0) break; ai = am[0]; bi = bm[0]; newCompactDiff.Add(ai); newCompactDiff.Add(bi); } if (ai < a.Count || bi < b.Count) { newCompactDiff.Add(a.Count); newCompactDiff.Add(b.Count); } return newCompactDiff; }