public override int Compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)
        {
            int n1 = WritableUtils.DecodeVIntSize(b1[s1]);
            int n2 = WritableUtils.DecodeVIntSize(b2[s2]);
            IList <KeyFieldHelper.KeyDescription> allKeySpecs = keyFieldHelper.KeySpecs();

            if (allKeySpecs.Count == 0)
            {
                return(CompareBytes(b1, s1 + n1, l1 - n1, b2, s2 + n2, l2 - n2));
            }
            int[] lengthIndicesFirst  = keyFieldHelper.GetWordLengths(b1, s1 + n1, s1 + l1);
            int[] lengthIndicesSecond = keyFieldHelper.GetWordLengths(b2, s2 + n2, s2 + l2);
            foreach (KeyFieldHelper.KeyDescription keySpec in allKeySpecs)
            {
                int startCharFirst = keyFieldHelper.GetStartOffset(b1, s1 + n1, s1 + l1, lengthIndicesFirst
                                                                   , keySpec);
                int endCharFirst = keyFieldHelper.GetEndOffset(b1, s1 + n1, s1 + l1, lengthIndicesFirst
                                                               , keySpec);
                int startCharSecond = keyFieldHelper.GetStartOffset(b2, s2 + n2, s2 + l2, lengthIndicesSecond
                                                                    , keySpec);
                int endCharSecond = keyFieldHelper.GetEndOffset(b2, s2 + n2, s2 + l2, lengthIndicesSecond
                                                                , keySpec);
                int result;
                if ((result = CompareByteSequence(b1, startCharFirst, endCharFirst, b2, startCharSecond
                                                  , endCharSecond, keySpec)) != 0)
                {
                    return(result);
                }
            }
            return(0);
        }
예제 #2
0
        /// <summary>Test is key-field-helper's getWordLengths.</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestGetWordLengths()
        {
            KeyFieldHelper helper = new KeyFieldHelper();

            helper.SetKeyFieldSeparator("\t");
            // test getWordLengths with unspecified key-specifications
            string input = "hi";

            int[] result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0,
                                                 2);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 1 }));
            // set the key specs
            helper.SetKeyFieldSpec(1, 2);
            // test getWordLengths with 3 words
            input  = "hi\thello there";
            result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, input
                                           .Length);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 2, 2, 11 }));
            // test getWordLengths with 4 words but with a different separator
            helper.SetKeyFieldSeparator(" ");
            input  = "hi hello\tthere you";
            result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, input
                                           .Length);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 3, 2, 11, 3 }));
            // test with non zero start index
            input = "hi hello there you where me there";
            //                 .....................
            result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 10, 33);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 5, 4, 3, 5, 2, 3 }));
            input = "hi hello there you where me ";
            //                 ..................
            result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 10, input
                                           .Length);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 5, 4, 3, 5, 2, 0 }));
            input  = string.Empty;
            result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 0);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 1, 0 }));
            input  = "  abc";
            result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 5);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 3, 0, 0, 3 }));
            input  = "  abc";
            result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 2);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 3, 0, 0, 0 }));
            input  = " abc ";
            result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 2);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 2, 0, 1 }));
            helper.SetKeyFieldSeparator("abcd");
            input  = "abc";
            result = helper.GetWordLengths(Sharpen.Runtime.GetBytesForString(input), 0, 3);
            NUnit.Framework.Assert.IsTrue(Equals(result, new int[] { 1, 3 }));
        }
예제 #3
0
        private void TestKeySpecs(string input, string expectedOutput, KeyFieldHelper helper
                                  , int s1, int e1)
        {
            Log.Info("input : " + input);
            string keySpecs = helper.KeySpecs()[0].ToString();

            Log.Info("keyspecs : " + keySpecs);
            byte[] inputBytes = Sharpen.Runtime.GetBytesForString(input);
            // get the input bytes
            if (e1 == -1)
            {
                e1 = inputBytes.Length;
            }
            Log.Info("length : " + e1);
            // get the word lengths
            int[] indices = helper.GetWordLengths(inputBytes, s1, e1);
            // get the start index
            int start = helper.GetStartOffset(inputBytes, s1, e1, indices, helper.KeySpecs()[
                                                  0]);

            Log.Info("start : " + start);
            if (expectedOutput == null)
            {
                NUnit.Framework.Assert.AreEqual("Expected -1 when the start index is invalid", -1
                                                , start);
                return;
            }
            // get the end index
            int end = helper.GetEndOffset(inputBytes, s1, e1, indices, helper.KeySpecs()[0]);

            Log.Info("end : " + end);
            //my fix
            end = (end >= inputBytes.Length) ? inputBytes.Length - 1 : end;
            int length = end + 1 - start;

            Log.Info("length : " + length);
            byte[] outputBytes = new byte[length];
            System.Array.Copy(inputBytes, start, outputBytes, 0, length);
            string output = Sharpen.Runtime.GetStringForBytes(outputBytes);

            Log.Info("output : " + output);
            Log.Info("expected-output : " + expectedOutput);
            NUnit.Framework.Assert.AreEqual(keySpecs + " failed on input '" + input + "'", expectedOutput
                                            , output);
        }
        public override int GetPartition(K2 key, V2 value, int numReduceTasks)
        {
            byte[] keyBytes;
            IList <KeyFieldHelper.KeyDescription> allKeySpecs = keyFieldHelper.KeySpecs();

            if (allKeySpecs.Count == 0)
            {
                return(GetPartition(key.ToString().GetHashCode(), numReduceTasks));
            }
            try
            {
                keyBytes = Sharpen.Runtime.GetBytesForString(key.ToString(), "UTF-8");
            }
            catch (UnsupportedEncodingException e)
            {
                throw new RuntimeException("The current system does not " + "support UTF-8 encoding!"
                                           , e);
            }
            // return 0 if the key is empty
            if (keyBytes.Length == 0)
            {
                return(0);
            }
            int[] lengthIndicesFirst = keyFieldHelper.GetWordLengths(keyBytes, 0, keyBytes.Length
                                                                     );
            int currentHash = 0;

            foreach (KeyFieldHelper.KeyDescription keySpec in allKeySpecs)
            {
                int startChar = keyFieldHelper.GetStartOffset(keyBytes, 0, keyBytes.Length, lengthIndicesFirst
                                                              , keySpec);
                // no key found! continue
                if (startChar < 0)
                {
                    continue;
                }
                int endChar = keyFieldHelper.GetEndOffset(keyBytes, 0, keyBytes.Length, lengthIndicesFirst
                                                          , keySpec);
                currentHash = HashCode(keyBytes, startChar, endChar, currentHash);
            }
            return(GetPartition(currentHash, numReduceTasks));
        }