public override int Compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { int n1 = WritableUtils.DecodeVIntSize(b1[s1]); int n2 = WritableUtils.DecodeVIntSize(b2[s2]); IList <KeyFieldHelper.KeyDescription> allKeySpecs = keyFieldHelper.KeySpecs(); if (allKeySpecs.Count == 0) { return(CompareBytes(b1, s1 + n1, l1 - n1, b2, s2 + n2, l2 - n2)); } int[] lengthIndicesFirst = keyFieldHelper.GetWordLengths(b1, s1 + n1, s1 + l1); int[] lengthIndicesSecond = keyFieldHelper.GetWordLengths(b2, s2 + n2, s2 + l2); foreach (KeyFieldHelper.KeyDescription keySpec in allKeySpecs) { int startCharFirst = keyFieldHelper.GetStartOffset(b1, s1 + n1, s1 + l1, lengthIndicesFirst , keySpec); int endCharFirst = keyFieldHelper.GetEndOffset(b1, s1 + n1, s1 + l1, lengthIndicesFirst , keySpec); int startCharSecond = keyFieldHelper.GetStartOffset(b2, s2 + n2, s2 + l2, lengthIndicesSecond , keySpec); int endCharSecond = keyFieldHelper.GetEndOffset(b2, s2 + n2, s2 + l2, lengthIndicesSecond , keySpec); int result; if ((result = CompareByteSequence(b1, startCharFirst, endCharFirst, b2, startCharSecond , endCharSecond, keySpec)) != 0) { return(result); } } return(0); }
/// <summary>Test is key-field-helper's getWordLengths.</summary> /// <exception cref="System.Exception"/> public virtual void TestGetWordLengths() { KeyFieldHelper helper = new KeyFieldHelper(); helper.SetKeyFieldSeparator("\t"); // test getWordLengths with unspecified key-specifications string input = "hi"; int[] result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 2); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 1 })); // set the key specs helper.SetKeyFieldSpec(1, 2); // test getWordLengths with 3 words input = "hi\thello there"; result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, input .Length); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 2, 2, 11 })); // test getWordLengths with 4 words but with a different separator helper.SetKeyFieldSeparator(" "); input = "hi hello\tthere you"; result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, input .Length); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 3, 2, 11, 3 })); // test with non zero start index input = "hi hello there you where me there"; // ..................... result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 10, 33); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 5, 4, 3, 5, 2, 3 })); input = "hi hello there you where me "; // .................. result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 10, input .Length); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 5, 4, 3, 5, 2, 0 })); input = string.Empty; result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 0); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 1, 0 })); input = " abc"; result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 5); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 3, 0, 0, 3 })); input = " abc"; result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 2); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 3, 0, 0, 0 })); input = " abc "; result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 2); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 2, 0, 1 })); helper.SetKeyFieldSeparator("abcd"); input = "abc"; result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 3); NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 1, 3 })); }
private void TestKeySpecs(string input, string expectedOutput, KeyFieldHelper helper , int s1, int e1) { Log.Info("input : " + input); string keySpecs = helper.KeySpecs()[0].ToString(); Log.Info("keyspecs : " + keySpecs); byte[] inputBytes = Sharpen.Runtime.GetBytesForString(input); // get the input bytes if (e1 == -1) { e1 = inputBytes.Length; } Log.Info("length : " + e1); // get the word lengths int[] indices = helper.GetWordLengths(inputBytes, s1, e1); // get the start index int start = helper.GetStartOffset(inputBytes, s1, e1, indices, helper.KeySpecs()[ 0]); Log.Info("start : " + start); if (expectedOutput == null) { NUnit.Framework.Assert.AreEqual("Expected -1 when the start index is invalid", -1 , start); return; } // get the end index int end = helper.GetEndOffset(inputBytes, s1, e1, indices, helper.KeySpecs()[0]); Log.Info("end : " + end); //my fix end = (end >= inputBytes.Length) ? inputBytes.Length - 1 : end; int length = end + 1 - start; Log.Info("length : " + length); byte[] outputBytes = new byte[length]; System.Array.Copy(inputBytes, start, outputBytes, 0, length); string output = Sharpen.Runtime.GetStringForBytes(outputBytes); Log.Info("output : " + output); Log.Info("expected-output : " + expectedOutput); NUnit.Framework.Assert.AreEqual(keySpecs + " failed on input '" + input + "'", expectedOutput , output); }
public override int GetPartition(K2 key, V2 value, int numReduceTasks) { byte[] keyBytes; IList <KeyFieldHelper.KeyDescription> allKeySpecs = keyFieldHelper.KeySpecs(); if (allKeySpecs.Count == 0) { return(GetPartition(key.ToString().GetHashCode(), numReduceTasks)); } try { keyBytes = Sharpen.Runtime.GetBytesForString(key.ToString(), "UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not " + "support UTF-8 encoding!" , e); } // return 0 if the key is empty if (keyBytes.Length == 0) { return(0); } int[] lengthIndicesFirst = keyFieldHelper.GetWordLengths(keyBytes, 0, keyBytes.Length ); int currentHash = 0; foreach (KeyFieldHelper.KeyDescription keySpec in allKeySpecs) { int startChar = keyFieldHelper.GetStartOffset(keyBytes, 0, keyBytes.Length, lengthIndicesFirst , keySpec); // no key found! continue if (startChar < 0) { continue; } int endChar = keyFieldHelper.GetEndOffset(keyBytes, 0, keyBytes.Length, lengthIndicesFirst , keySpec); currentHash = HashCode(keyBytes, startChar, endChar, currentHash); } return(GetPartition(currentHash, numReduceTasks)); }