KinesisRecord make_kinesis_record(int min_serialized_size = 0) { var kr = new KinesisRecord(); int num_user_records = 1 + (new Random().Next() % 100); Func <UserRecord> make_ur = () => { return(TestUtil.make_user_record( new Random().Next().ToString(), new Random().Next().ToString(), new Random().Next().ToString(), 10000, "myStream")); }; for (int i = 0; i < num_user_records; i++) { kr.add(make_ur()); } if (min_serialized_size > 0) { while ((int)kr.accurate_size() < min_serialized_size) { kr.add(make_ur()); } } return(kr); }
public static void verify(List <UserRecord> original, KinesisRecord kr) { AggregatedRecord ar = null; verify_format(original, kr, ref ar); verify_content(original, ar); }
public void KinesisRecordUnitTest_Clearing() { int N = 10; List <UserRecord> user_records = new List <UserRecord>(); KinesisRecord r = new KinesisRecord(); for (int i = 0; i < N; i++) { var ur = TestUtil.make_user_record(); user_records.Add(ur); r.add(ur); } TestUtil.verify(user_records, r); for (int i = 0; i < 5; i++) { r.clear(); user_records.Clear(); for (int j = 0; j < N; j++) { var ur = TestUtil.make_user_record(); user_records.Add(ur); r.add(ur); } TestUtil.verify(user_records, r); } }
public void KinesisRecordUnitTest_AccurateSize() { for (int i = 0; i < 100; i++) { KinesisRecord r = new KinesisRecord(); long num_records = new Random().Next(1, 512); for (long j = 0; j < num_records; j++) { int key_size = new Random().Next(1, 256); int data_size = new Random().Next(1, 64 * 1024); var ur = TestUtil.make_user_record(new string('a', key_size), new string('a', data_size), new Random().Next() % 2 == 0 ? "" : "123"); r.add(ur); } int predicted = (int)r.accurate_size(); string serialized = r.serialize(); if (r.Items().Count > 1) { Assert.AreEqual(predicted, serialized.Length - 16); } else { Assert.AreEqual(predicted, serialized.Length); } } }
public void ReducerUnitTest_CountLimit() { int limit = 100; var reducer = make_reducer(int.MaxValue, limit); List <UserRecord> v = new List <UserRecord>(); for (int j = 0; j < 3; j++) { v.Clear(); for (int i = 0; i < limit; i++) { var ur = TestUtil.make_user_record(); v.Add(ur); KinesisRecord result = reducer.add(ur) as KinesisRecord; if (i < limit - 1) { Assert.IsNull(result); } else { Assert.IsNotNull(result); TestUtil.verify(v, result); Assert.AreEqual((int)reducer.size(), 0); } } } }
KinesisRecord make_kinesis_record(DateTime deadline, DateTime expiration) { var ur = TestUtil.make_user_record(); ur.set_deadline(deadline); ur.set_expiration(expiration); ur.Predicted_shard(0); var kr = new KinesisRecord(); kr.add(ur); return(kr); }
public void KinesisRecordUnitTest_EstimatedSize() { for (int i = 0; i < 50; i++) { KinesisRecord r = new KinesisRecord(); int num_records = new Random().Next(2, 512); // non repeated partition keys for (int j = 0; j < num_records; j++) { int key_size = new Random().Next(1, 256); int data_size = new Random().Next(1, 64 * 1024); var ur = TestUtil.make_user_record(new string('a', key_size), new string('a', data_size), new Random().Next() % 2 == 0 ? "" : "123"); r.add(ur); } // repeated partition keys int key_size1 = new Random().Next(1, 256); for (long j = 0; j < num_records; j++) { int data_size = new Random().Next(1, 64 * 1024); var ur = TestUtil.make_user_record(new string('a', key_size1), new string('a', data_size), new Random().Next() % 2 == 0 ? "" : "123"); r.add(ur); } // small keys small data for (long j = 0; j < num_records; j++) { var ur = TestUtil.make_user_record(new string('a', 2), new string('a', 2), new Random().Next() % 2 == 0 ? "" : "123"); r.add(ur); } int estimated = (int)r.Estimated_size(); string serialized = r.serialize(); double diff = (double)serialized.Length - estimated; double percentage_diff = diff / serialized.Length * 100; percentage_diff *= percentage_diff < 0 ? -1 : 1; StringBuilder ss = new StringBuilder(); ss.Append("Estimated size should be within 1 percent or 32 bytes of actual ") .Append("size, estimate was ").Append(estimated).Append(", actual size was ") .Append(serialized.Length).Append(" (").Append(percentage_diff).Append("% difference)"); //BOOST_CHECK_MESSAGE(percentage_diff < 1 || diff < 32, ss.ToString()); } }
public static void verify_unaggregated(UserRecord ur, KinesisRecord kr) { var serialized = kr.serialize(); Assert.AreEqual(ur.Data().ToString(Encoding.Default), serialized); Assert.AreEqual(ur.Partition_key(), kr.partition_key()); if (ur.explicit_hash_key().ToString() != "-1") { Assert.AreEqual(ur.explicit_hash_key().ToString(), kr.explicit_hash_key()); } else { Assert.AreEqual(KPLNETInterface.Utils.GetDecimalHashKey(ur.Partition_key()).ToString(), kr.explicit_hash_key()); } }
public void ReducerUnitTest_NonEmpty() { KinesisRecord kr = null; var reducer = make_reducer( int.MaxValue, int.MaxValue, (result) => { kr = result; }); reducer.Flush(); Thread.Sleep(100); Assert.IsNull(kr); //"Flush should not have produced a KinesisRecord because the Reducer is empty" }
public void KinesisRecordUnitTest_Empty() { KinesisRecord r = new KinesisRecord(); Assert.AreEqual(0, (int)r.accurate_size()); Assert.AreEqual(0, (int)r.Estimated_size()); try { r.serialize(); Assert.Fail("Calling serialize on empty KinesisRecord should cause exception"); } catch (Exception e) { // expected } }
public void KinesisRecordUnitTest_SingleRecordTestMethod() { // No explicit hash key (ehk) { KinesisRecord r = new KinesisRecord(); var ur = TestUtil.make_user_record(); r.add(ur); TestUtil.verify_unaggregated(ur, r); } // With ehk { KinesisRecord r = new KinesisRecord(); var ur = TestUtil.make_user_record("a", "a", "123"); r.add(ur); TestUtil.verify_unaggregated(ur, r); } }
public void ReducerUnitTest_Deadline() { KinesisRecord kr = null; var reducer = make_reducer( int.MaxValue, int.MaxValue, (result) => { kr = result; }); List <UserRecord> v = new List <UserRecord>(); for (int i = 0; i < 100; i++) { var ur = TestUtil.make_user_record( "pk", new Random().Next().ToString(), "123", 5000 + i); v.Add(ur); reducer.add(ur); } // Should not flush after 1 second because we've set the deadlines to be 5 Thread.Sleep(1000); Assert.AreEqual((int)reducer.size(), 100); // Now we're going to put a record with a deadline that's now to trigger the // flush. var ur1 = TestUtil.make_user_record( "pk", new Random().Next().ToString(), "123", 0); reducer.add(ur1); // This record should be moved to the front when flushed because of its // deadline v.Insert(0, ur1); Thread.Sleep(300); Assert.AreEqual((int)reducer.size(), 0); Assert.IsNotNull(kr); TestUtil.verify(v, kr); }
AwsKinesisResult make_prr_ctx(int num_kr, int num_ur_per_kr, string error, Amazon.Kinesis.Model.PutRecordsResponse outcome, DateTime start, DateTime end) { List <KinesisRecord> krs = new List <KinesisRecord>(); for (int i = 0; i < num_kr; i++) { var kr = new KinesisRecord(); for (int j = 0; j < num_ur_per_kr; j++) { var ur = TestUtil.make_user_record(); ur.Predicted_shard(i); kr.add(ur); } krs.Add(kr); } AwsKinesisResult result = new AwsKinesisResult(error, new AwsKinesisResponse(outcome), new PutRecordsRequest(), start, end); result.context <PutRecordsRequest>().Items().AddRange(krs); return(result); }
public static void verify_format(List <UserRecord> original, KinesisRecord kr, ref AggregatedRecord container) { container = null; byte[] serialized = kr.SerializedAggregatedRecord; // verify magic number byte[] expected_magic = null; unchecked { expected_magic = new byte[] { (byte)-13, (byte)-119, (byte)-102, (byte)-62 }; } int magic_len = expected_magic.Length; byte[] magic = serialized.Take(magic_len).ToArray(); Assert.IsTrue(KPLNETInterface.Utils.AreArrayEqual(expected_magic, magic)); // verify protobuf payload byte[] payload = serialized.Skip(expected_magic.Length).Take(serialized.Length - 16 - magic_len).ToArray(); container = AggregatedRecord.Parser.ParseFrom(Google.Protobuf.ByteString.CopyFrom(payload)); Assert.IsNotNull(container); // verify md5 checksum Assert.IsTrue(KPLNETInterface.Utils.AreArrayEqual(KPLNETInterface.Utils.GetMD5(payload), serialized.Skip(serialized.Length - 16).Take(16).ToArray())); // verify the explicit hash key set on the Kinesis record List <string> acceptable_hash_keys = new List <string>(); foreach (var ur in original) { if (ur.explicit_hash_key() > -1) { acceptable_hash_keys.Add(ur.explicit_hash_key().ToString()); } else { acceptable_hash_keys.Add(KPLNETInterface.Utils.GetDecimalHashKey(ur.Partition_key()).ToString()); } } Assert.IsTrue(acceptable_hash_keys.Exists((i) => i == kr.explicit_hash_key())); }
public void KinesisRecordUnitTest_SameEHK() { int N = 1000; int M = 100; List <UserRecord> user_records = new List <UserRecord>(); KinesisRecord r = new KinesisRecord(); for (int i = 0; i < N; i++) { var ur = TestUtil.make_user_record("pk", "data", "123"); user_records.Add(ur); r.add(ur); } for (int i = 0; i < M; i++) { r.remove_last(); user_records.RemoveAt(user_records.Count - 1); } TestUtil.verify(user_records, r); }
public void KinesisRecordUnitTest_Deadlines() { // The nearest deadline should always be kept { KinesisRecord r = new KinesisRecord(); var start = DateTime.Now; for (int i = 0; i < 10; i++) { var ur = TestUtil.make_user_record(); ur.set_deadline(start.AddMilliseconds(i * 100)); ur.set_expiration(start.AddMilliseconds(i * 100)); r.add(ur); } Assert.IsTrue(r.Deadline() == start); Assert.IsTrue(r.Expiration() == start); } // If a nearer deadline comes in, it should override the previous { KinesisRecord r = new KinesisRecord(); var earlier = DateTime.Now; var later = earlier.AddMilliseconds(500); { var ur = TestUtil.make_user_record(); ur.set_deadline(later); r.add(ur); Assert.IsTrue(r.Deadline() == later); } { var ur = TestUtil.make_user_record(); ur.set_deadline(earlier); r.add(ur); Assert.IsTrue(r.Deadline() == earlier); } // Removing the last added record should restore the previous deadline r.remove_last(); Assert.IsTrue(r.Deadline() == later); } }
public void KinesisRecordUnitTest_DifferentParitionKey() { int N = 1000; int M = 100; List <UserRecord> user_records = new List <UserRecord>(); KinesisRecord r = new KinesisRecord(); for (int i = 0; i < N; i++) { var ur = TestUtil.make_user_record(i.ToString()); user_records.Add(ur); r.add(ur); } for (int i = 0; i < M; i++) { r.remove_last(); user_records.RemoveAt(user_records.Count - 1); } TestUtil.verify(user_records, r); }
public void AggregatorUnitTest_BasicTestMethod() { var aggregator = make_aggregator(); for (int shard_id = 1; shard_id <= 3; shard_id++) { List <UserRecord> v = new List <UserRecord>(); KinesisRecord kr = null; for (int i = 0; i < kCountLimit; i++) { var ur = TestUtil.make_user_record("pk", TestUtil.random_string(new Random().Next(100)), TestUtil.get_hash_key(shard_id).ToString(), 10000 + i, "MyStream", 0); kr = aggregator.put(ur); v.Add(ur); Assert.IsFalse(ur.Predicted_shard() == -1); Assert.AreEqual(ur.Predicted_shard(), shard_id); } Assert.IsNotNull(kr); TestUtil.verify(v, kr); } }
public void KinesisRecordUnitTest_MixedParitionKey() { List <string> keys = new List <string>() { "a", "b", "b", "a", "a", "c", "b", "a", "d", "e", "e", "d", "d", "d" }; List <UserRecord> user_records = new List <UserRecord>(); KinesisRecord r = new KinesisRecord(); for (int i = 0; i < keys.Count; i++) { var ur = TestUtil.make_user_record(keys[i]); user_records.Add(ur); r.add(ur); } for (int i = 0; i < 3; i++) { r.remove_last(); user_records.RemoveAt(user_records.Count - 1); } TestUtil.verify(user_records, r); }
public void KinesisRecordUnitTest_MixedEHK() { List <string> keys = new List <string>() { "1", "2", "2", "1", "1", "3", "2", "1", "4", "5", "5", "4", "4", "4" }; List <UserRecord> user_records = new List <UserRecord>(); KinesisRecord r = new KinesisRecord(); for (int i = 0; i < keys.Count; i++) { var ur = TestUtil.make_user_record("pk", "data", keys[i]); user_records.Add(ur); r.add(ur); } for (int i = 0; i < 3; i++) { r.remove_last(); user_records.RemoveAt(user_records.Count - 1); } TestUtil.verify(user_records, r); }
public void ReducerUnitTest_ResetTimeout() { KinesisRecord kr = null; var reducer = make_reducer( 300, int.MaxValue, (result) => { kr = result; }); List <UserRecord> v = new List <UserRecord>(); int k = 0; for (int i = 0; i < 10; i++) { var ur = TestUtil.make_user_record( "pk", new Random().Next().ToString(), "123", 2000 + k++); v.Add(ur); reducer.add(ur); } // This record has a much closer deadline { var ur = TestUtil.make_user_record( "pk", new Random().Next().ToString(), "123", 100); v.Insert(0, ur); reducer.add(ur); } // Put records until flush happens KinesisRecord kr2; { do { // The other records have a further deadline. var ur = TestUtil.make_user_record( "pk", new Random().Next().ToString(), "123", 2000 + k++); v.Add(ur); kr2 = reducer.add(ur) as KinesisRecord; } while (kr2 == null); } // Put a few more to make sure the buffer is not empty for (int i = 0; i < 10; i++) { var ur = TestUtil.make_user_record( "pk", new Random().Next().ToString(), "123", 2000 + k++); v.Add(ur); reducer.add(ur); } // No flush should happen in the next second even though we had a record with // a 100ms deadline - that record should've gotten flushed by the size limit, // and its deadline should no longer apply. Thread.Sleep(1000); Assert.IsNull(kr); // The timer should now be set to the min deadline of the remaining records. // It should go off after another second or so Thread.Sleep(1500); Assert.IsNotNull(kr); // Check data integrity Assert.AreEqual(kr.Size() + kr2.Size() + reducer.size(), (ulong)v.Count); for (int i = v.Count - 1; i >= (int)kr.Size() + (int)kr2.Size(); i--) { v.RemoveAt(i); } List <UserRecord> v2 = new List <UserRecord>(); for (int i = (int)kr2.Size() - 1; i >= 0; i--) { v2.Insert(0, v[i]); v.RemoveAt(i); } TestUtil.verify(v, kr); TestUtil.verify(v2, kr2); }
public void ReducerUnitTest_Concurrency() { int counter = 0; //LOG(info) << "Starting concurrency test. If this doesn't finish in 30 " << "seconds or so it probably means there's a deadlock."; ConcurrentBag <UserRecord> put = new ConcurrentBag <UserRecord>(); ConcurrentBag <KinesisRecord> results = new ConcurrentBag <KinesisRecord>(); var reducer = make_reducer(5000, int.MaxValue, (result) => { results.Add(result); }); List <Thread> threads = new List <Thread>(); for (int i = 0; i < 16; i++) { var t = new Thread(() => { for (int j = 0; j < 32; j++) { KinesisRecord kr = null; do { int count = Interlocked.Increment(ref counter); var ur = TestUtil.make_user_record("pk", count.ToString(), "123", count); put.Add(ur); kr = reducer.add(ur) as KinesisRecord; } while (kr == null); results.Add(kr); // Call flush sometimes too to mix things up further if (i % 8 == 0 && i == j) { reducer.Flush(); } } }); t.Start(); threads.Add(t); } foreach (var t in threads) { t.Join(); } while (reducer.size() > 0) { reducer.Flush(); Thread.Sleep(1000); } Assert.AreEqual((int)reducer.size(), 0); //LOG(info) << "Finished putting data, " << counter << " records put. " // << "Analyzing results..."; // Check that all records made it out. Order is no longer guaranteed with // many workers, but we've put a monotonically increasing number in the // record data, so that will allow us to sort and match records up. List <UserRecord> put_v = new List <UserRecord>(put.ToArray()); List <UserRecord> result_v = new List <UserRecord>(); var resultsArray = results.ToArray(); for (int i = 0; i < results.Count; i++) { result_v.AddRange(resultsArray[i].Items()); } Comparison <UserRecord> ckr = (a, b) => { return(string.Compare(a.Data().ToStringUtf8(), b.Data().ToStringUtf8())); }; put_v.Sort(ckr); result_v.Sort(ckr); Assert.AreEqual(put_v.Count, result_v.Count); for (int i = 0; i < put_v.Count; i++) { Assert.AreEqual(put_v[i], result_v[i]); } }