/// <summary> /// Measures the distance between two positions. /// </summary> public double MeasureDistance(CoordinateVector p1, CoordinateVector p2) { KeyValuePair<ulong,double>[] arr1 = p1.CoordArray; KeyValuePair<ulong,double>[] arr2 = p2.CoordArray; // Store these heavily used values locally. int arr1Length = arr1.Length; int arr2Length = arr2.Length; //--- Test for special cases. if(0 == arr1Length && 0 == arr2Length) { // Both arrays are empty. No disparities, therefore the distance is zero. return 0.0; } double distance = 0; if(0 == arr1Length) { // All arr2 genes are mismatches. for(int i=0; i<arr2Length; i++) { distance += arr2[i].Value * arr2[i].Value; } return Math.Sqrt(distance); } if(0 == arr2Length) { // All arr1 elements are mismatches. for(int i=0; i<arr1Length; i++) { distance += arr1[i].Value * arr1[i].Value; } return Math.Sqrt(distance); } //----- Both arrays contain elements. int arr1Idx = 0; int arr2Idx = 0; KeyValuePair<ulong,double> elem1 = arr1[arr1Idx]; KeyValuePair<ulong,double> elem2 = arr2[arr2Idx]; for(;;) { if(elem1.Key < elem2.Key) { // p2 doesn't specify a value in this dimension therefore we take it's position to be 0. distance += elem1.Value * elem1.Value; // Move to the next element in arr1. arr1Idx++; } else if(elem1.Key == elem2.Key) { // Matching elements. Note that abs() isn't required because we square the result. double tmp = elem1.Value - elem2.Value; distance += tmp * tmp; // Move to the next element in both arrays. arr1Idx++; arr2Idx++; } else // elem2.Key < elem1.Key { // p1 doesn't specify a value in this dimension therefore we take it's position to be 0. distance += elem2.Value * elem2.Value; // Move to the next element in arr2. arr2Idx++; } // Check if we have exhausted one or both of the arrays. if(arr1Idx == arr1.Length) { // All remaining arr2 elements are mismatches. for(int i=arr2Idx; i<arr2Length; i++) { distance += arr2[i].Value * arr2[i].Value; } return Math.Sqrt(distance); } if(arr2Idx == arr2Length) { // All remaining arr1 elements are mismatches. for(int i=arr1Idx; i<arr1.Length; i++) { distance += arr1[i].Value * arr1[i].Value; } return Math.Sqrt(distance); } elem1 = arr1[arr1Idx]; elem2 = arr2[arr2Idx]; } }
/// <summary> /// Tests if the distance between two positions is less than some threshold. /// /// A simple way of implementing this method would be to calculate the distance between the /// two coordinates and test if it is less than the threshold. However, that approach requires that all of the /// elements in both CoordinateVectors be fully compared. We can improve performance in the general case /// by testing if the threshold has been passed after each vector element comparison thus allowing an early exit /// from the method for many calls. Further to this, we can begin comparing from the ends of the vectors where /// differences are most likely to occur. /// </summary> public bool MeasureDistance(CoordinateVector p1, CoordinateVector p2, double threshold) { // Instead of calculating the euclidean distance we calculate distance squared (we skip the final sqrt // part of the formula). If we then square the threshold value this obviates the need to take the square // root when comparing our accumulating calculated distance with the threshold. threshold = threshold * threshold; KeyValuePair<ulong,double>[] arr1 = p1.CoordArray; KeyValuePair<ulong,double>[] arr2 = p2.CoordArray; // Store these heavily used values locally. int arr1Length = arr1.Length; int arr2Length = arr2.Length; //--- Test for special cases. if(0 == arr1Length && 0 == arr2Length) { // Both arrays are empty. No disparities, therefore the distance is zero. return 0.0 < threshold; } double distance = 0.0; if(0 == arr1Length) { // All arr2 elements are mismatches. // p1 doesn't specify a value in these dimensions therefore we take its position to be 0 in all of them. for(int i=0; i<arr2Length; i++) { distance += arr2[i].Value * arr2[i].Value; } return distance < threshold; } if(0 == arr2Length) { // All arr1 elements are mismatches. // p2 doesn't specify a value in these dimensions therefore we take it's position to be 0 in all of them. for(int i=0; i<arr1Length; i++) { distance += arr1[i].Value * arr1[i].Value; } return distance < threshold; } //----- Both arrays contain elements. Compare the contents starting from the ends where the greatest discrepancies // between coordinates are expected to occur. Generally this should result in less element comparisons // before the threshold is passed and we exit the method. int arr1Idx = arr1Length - 1; int arr2Idx = arr2Length - 1; KeyValuePair<ulong,double> elem1 = arr1[arr1Idx]; KeyValuePair<ulong,double> elem2 = arr2[arr2Idx]; for(;;) { if(elem2.Key > elem1.Key) { // p1 doesn't specify a value in this dimension therefore we take it's position to be 0. distance += elem2.Value * elem2.Value; // Move to the next element in arr2. arr2Idx--; } else if(elem1.Key == elem2.Key) { // Matching elements. Note that abs() isn't required because we square the result. double tmp = elem1.Value - elem2.Value; distance += tmp * tmp; // Move to the next element in both lists. arr1Idx--; arr2Idx--; } else // elem1.Key > elem2.Key { // p2 doesn't specify a value in this dimension therefore we take it's position to be 0. distance += elem1.Value * elem1.Value; // Move to the next element in arr1. arr1Idx--; } // Test the threshold. if(distance >= threshold) { return false; } // Check if we have exhausted one or both of the arrays. if(arr1Idx < 0) { // Any remaining arr2 elements are mismatches. for(int i=arr2Idx; i > -1; i--) { distance += arr2[i].Value * arr2[i].Value; } return distance < threshold; } if(arr2Idx < 0) { // All remaining arr1 elements are mismatches. for(int i=arr1Idx; i > -1; i--) { distance += arr1[i].Value * arr1[i].Value; } return distance < threshold; } elem1 = arr1[arr1Idx]; elem2 = arr2[arr2Idx]; } }
/// <summary> /// Tests if the distance between two positions is less than some threshold. /// /// A simple way of implementing this method would be to calculate the distance between the /// two coordinates and test if it is less than the threshold. However, that approach requires that all of the /// elements in both CoordinateVectors be fully compared. We can improve performance in the general case /// by testing if the threshold has been passed after each vector element comparison thus allowing an early exit /// from the method for many calls. Further to this, we can begin comparing from the ends of the vectors where /// differences are most likely to occur. /// </summary> public bool MeasureDistance(CoordinateVector p1, CoordinateVector p2, double threshold) { KeyValuePair<ulong,double>[] arr1 = p1.CoordArray; KeyValuePair<ulong,double>[] arr2 = p2.CoordArray; // Store these heavily used values locally. int arr1Length = arr1.Length; int arr2Length = arr2.Length; //--- Test for special cases. if(0 == arr1Length && 0 == arr2Length) { // Both arrays are empty. No disparities, therefore the distance is zero. return 0.0 < threshold; } double distance = 0.0; if(0 == arr1Length) { // All arr2 elements are mismatches. // p1 doesn't specify a value in these dimensions therefore we take its position to be 0 in all of them. for(int i=0; i<arr2Length; i++) { distance += Math.Abs(arr2[i].Value); } distance = (_mismatchDistanceConstant * arr2Length) + (distance * _mismatchDistanceCoeff); return distance < threshold; } if(0 == arr2Length) { // All arr1 elements are mismatches. // p2 doesn't specify a value in these dimensions therefore we take it's position to be 0 in all of them. for(int i=0; i<arr1Length; i++) { distance += Math.Abs(arr1[i].Value); } distance = (_mismatchDistanceConstant * arr1Length) + (distance * _mismatchDistanceCoeff); return distance < threshold; } //----- Both arrays contain elements. Compare the contents starting from the ends where the greatest discrepancies // between coordinates are expected to occur. In the general case this should result in less element comparisons // before the threshold is passed and we exit the method. int arr1Idx = arr1Length - 1; int arr2Idx = arr2Length - 1; KeyValuePair<ulong,double> elem1 = arr1[arr1Idx]; KeyValuePair<ulong,double> elem2 = arr2[arr2Idx]; for(;;) { if(elem2.Key > elem1.Key) { // p1 doesn't specify a value in this dimension therefore we take it's position to be 0. distance += _mismatchDistanceConstant + (Math.Abs(elem2.Value) * _mismatchDistanceCoeff); // Move to the next element in arr2. arr2Idx--; } else if(elem1.Key == elem2.Key) { // Matching elements. distance += Math.Abs(elem1.Value - elem2.Value) * _matchDistanceCoeff; // Move to the next element in both arrays. arr1Idx--; arr2Idx--; } else // elem1.Key > elem2.Key { // p2 doesn't specify a value in this dimension therefore we take it's position to be 0. distance += _mismatchDistanceConstant + (Math.Abs(elem1.Value) * _mismatchDistanceCoeff); // Move to the next element in arr1. arr1Idx--; } // Test the threshold. if(distance >= threshold) { return false; } // Check if we have exhausted one or both of the arrays. if(arr1Idx < 0) { // Any remaining arr2 elements are mismatches. for(int i=arr2Idx; i >- 1; i--) { distance += _mismatchDistanceConstant + (Math.Abs(arr2[i].Value) * _mismatchDistanceCoeff); } return distance < threshold; } if(arr2Idx < 0) { // All remaining arr1 elements are mismatches. for(int i=arr1Idx; i > -1; i--) { distance += _mismatchDistanceConstant + (Math.Abs(arr1[i].Value) * _mismatchDistanceCoeff); } return distance < threshold; } elem1 = arr1[arr1Idx]; elem2 = arr2[arr2Idx]; } }