public static void Initialize(TestContext context) { var sparkContext = new SparkContext(null); var lines = sparkContext.TextFile(Path.GetTempFileName()); var words = lines.FlatMap(l => l.Split(' ')); doubles = words.Map(w => new KeyValuePair<string, int>(w, 1)).ReduceByKey((x, y) => x + y).Map(kv => (double)kv.Value); }
public static void Initialize() { var sparkContext = new SparkContext(null); var lines = sparkContext.TextFile(Path.GetTempFileName()); var words = lines.FlatMap(l => l.Split(' ')); pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); }
private static void CalculatePiUsingSerializedClassApproach(int n, RDD<int> rdd) { var count = rdd .Map(new PiHelper().Execute) .Reduce((x, y) => x + y); Logger.LogInfo(string.Format("(serialized class approach) Pi is roughly {0}.", 4.0 * count / n)); }
private static void CalculatePiUsingAnonymousMethod(int n, RDD<int> rdd) { var count = rdd .Map(i => { var random = new Random(); var x = random.NextDouble() * 2 - 1; var y = random.NextDouble() * 2 - 1; return (x * x + y * y) < 1 ? 1 : 0; }) .Reduce((x, y) => x + y); Logger.LogInfo(string.Format("(anonymous method approach) Pi is roughly {0}.", 4.0 * count / n)); }
public void TestGetDefaultPartitionNum() { var sparkContext = new SparkContext(null); var lines = sparkContext.TextFile(Path.GetTempFileName(), 5); words = lines.FlatMap(l => l.Split(' ')); var defaultNumPartitions = words.GetDefaultPartitionNum(); Assert.AreEqual(2, defaultNumPartitions); }
private void ProcessCallbackRequest(object socket) { logger.LogDebug("New thread (id={0}) created to process callback request", Thread.CurrentThread.ManagedThreadId); try { using (Socket sock = (Socket)socket) using (var s = new NetworkStream(sock)) { while (true) { try { string cmd = SerDe.ReadString(s); if (cmd == "close") { logger.LogDebug("receive close cmd from Scala side"); break; } else if (cmd == "callback") { int numRDDs = SerDe.ReadInt(s); var jrdds = new List<JvmObjectReference>(); for (int i = 0; i < numRDDs; i++) { jrdds.Add(new JvmObjectReference(SerDe.ReadObjectId(s))); } double time = SerDe.ReadDouble(s); IFormatter formatter = new BinaryFormatter(); object func = formatter.Deserialize(new MemoryStream(SerDe.ReadBytes(s))); string serializedMode = SerDe.ReadString(s); RDD<dynamic> rdd = null; if (jrdds[0].Id != null) rdd = new RDD<dynamic>(new RDDIpcProxy(jrdds[0]), sparkContext, (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode)); if (func is Func<double, RDD<dynamic>, RDD<dynamic>>) { JvmObjectReference jrdd = ((((Func<double, RDD<dynamic>, RDD<dynamic>>)func)(time, rdd)).RddProxy as RDDIpcProxy).JvmRddReference; SerDe.Write(s, (byte)'j'); SerDe.Write(s, jrdd.Id); } else if (func is Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>) { string serializedMode2 = SerDe.ReadString(s); RDD<dynamic> rdd2 = new RDD<dynamic>(new RDDIpcProxy(jrdds[1]), sparkContext, (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode2)); JvmObjectReference jrdd = ((((Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>)func)(time, rdd, rdd2)).RddProxy as RDDIpcProxy).JvmRddReference; SerDe.Write(s, (byte)'j'); SerDe.Write(s, jrdd.Id); } else { ((Action<double, RDD<dynamic>>)func)(time, rdd); SerDe.Write(s, (byte)'n'); } } } catch (Exception e) { //log exception only when callback socket is not shutdown explicitly if (!callbackSocketShutdown) { logger.LogError("Exception processing call back request. Thread id {0}", Thread.CurrentThread.ManagedThreadId); logger.LogException(e); // exit when exception happens logger.LogError("ProcessCallbackRequest fail, will exit ..."); Thread.Sleep(1000); System.Environment.Exit(1); } } } } } catch (Exception e) { logger.LogError("Exception in callback. Thread id {0}", Thread.CurrentThread.ManagedThreadId); logger.LogException(e); } logger.LogDebug("Thread (id={0}) to process callback request exiting", Thread.CurrentThread.ManagedThreadId); }
public void TestCoalesce() { const int numPartitions = 4; const bool shuffle = true; Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>(); Mock<IRDDProxy> coalescedRddProxy = new Mock<IRDDProxy>(); rddProxy.Setup(m => m.Coalesce(It.IsAny<int>(), It.IsAny<bool>())).Returns(coalescedRddProxy.Object); var rdd = new RDD<string> { rddProxy = rddProxy.Object }; var coalescedRdd = rdd.Coalesce(numPartitions, shuffle); Assert.IsNotNull(coalescedRdd); Assert.AreEqual(coalescedRddProxy.Object, coalescedRdd.RddProxy); }
public void TestRandomSampleWithRange() { Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>(); Mock<IRDDProxy> sampledRddProxy = new Mock<IRDDProxy>(); rddProxy.Setup(m => m.RandomSampleWithRange(It.IsAny<double>(), It.IsAny<double>(), It.IsAny<long>())).Returns(sampledRddProxy.Object); var rdd = new RDD<string> { rddProxy = rddProxy.Object }; var sampledRdd = rdd.RandomSampleWithRange(0.1, 0.8, new Random().Next()); Assert.IsNotNull(sampledRdd); Assert.AreEqual(sampledRddProxy.Object, sampledRdd.RddProxy); }
public void TestRandomSplit() { Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>(); rddProxy.Setup(m => m.RandomSplit(It.IsAny<double[]>(), It.IsAny<long>())).Returns(new[] {new Mock<IRDDProxy>().Object, new Mock<IRDDProxy>().Object}); var rdd = new RDD<string> { rddProxy = rddProxy.Object }; var rdds = rdd.RandomSplit(new double[] {2, 3}, 17); Assert.IsNotNull(rdds); Assert.AreEqual(2, rdds.Length); }
public void TestToDebugString() { Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>(); const string expectedDebugStr = "Debug String"; rddProxy.Setup(m => m.ToDebugString()).Returns(expectedDebugStr); var rdd = new RDD<string> { rddProxy = rddProxy.Object }; var debugStr = rdd.ToDebugString(); Assert.IsNotNull(debugStr); Assert.AreEqual(expectedDebugStr, debugStr); }
public void TestDStreamMapWithStateMapWithStateHelper() { // test when initialStateRdd is null var stateSpec = new StateSpec<string, int, int, int>((k, v, s) => v).NumPartitions(2).Timeout(TimeSpan.FromSeconds(100)); var helper = new MapWithStateHelper<string, int, int, int>((t, rdd) => rdd, stateSpec); var sparkContextProxy = new Mock<ISparkContextProxy>(); var sc = new SparkContext(sparkContextProxy.Object, null); var pairwiseRddProxy = new Mock<IRDDProxy>(); sparkContextProxy.Setup(p => p.CreatePairwiseRDD(It.IsAny<IRDDProxy>(), It.IsAny<int>(), It.IsAny<long>())).Returns(pairwiseRddProxy.Object); var pipelinedRddProxy = new Mock<IRDDProxy>(); pipelinedRddProxy.Setup(p => p.Union(It.IsAny<IRDDProxy>())).Returns(new Mock<IRDDProxy>().Object); sparkContextProxy.Setup(p => p.CreateCSharpRdd(It.IsAny<IRDDProxy>(), It.IsAny<byte[]>(), It.IsAny<Dictionary<string, string>>(), It.IsAny<List<string>>(), It.IsAny<bool>(), It.IsAny<List<Broadcast>>(), It.IsAny<List<byte[]>>())) .Returns(pipelinedRddProxy.Object); var valueRddProxy = new Mock<IRDDProxy>(); var valuesRdd = new RDD<dynamic>(valueRddProxy.Object, sc); var resultRdd = helper.Execute(DateTime.UtcNow.Millisecond, null, valuesRdd); Assert.IsNotNull(resultRdd); // test when initialStateRdd is not null var initialStateRdd = new RDD<KeyValuePair<string, int>>(new Mock<IRDDProxy>().Object, null); var stateSpec2 = new StateSpec<string, int, int, int>((k, v, s) => v).InitialState(initialStateRdd).NumPartitions(2); var helper2 = new MapWithStateHelper<string, int, int, int>((t, rdd) => rdd, stateSpec2); var resultRdd2 = helper2.Execute(DateTime.UtcNow.Millisecond, null, valuesRdd); Assert.IsNotNull(resultRdd2); }
/// <summary> /// Creates a <see cref="DataFrame"/> from a RDD containing array of object using the given schema. /// </summary> /// <param name="rdd">RDD containing array of object. The array acts as a row and items within the array act as columns which the schema is specified in <paramref name="schema"/>. </param> /// <param name="schema">The schema of DataFrame.</param> /// <returns></returns> public DataFrame CreateDataFrame(RDD<object[]> rdd, StructType schema) { // Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker. // The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]]. // In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside. // It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]]. var rddRow = rdd.Map(r => r); rddRow.serializedMode = SerializedMode.Row; return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext); }
private void ProcessCallbackRequest(object socket) { logger.LogInfo("new thread created to process callback request"); try { using (Socket sock = (Socket)socket) using (var s = new NetworkStream(sock)) { while (true) { try { string cmd = SerDe.ReadString(s); if (cmd == "close") { logger.LogInfo("receive close cmd from Scala side"); break; } else if (cmd == "callback") { int numRDDs = SerDe.ReadInt(s); var jrdds = new List<JvmObjectReference>(); for (int i = 0; i < numRDDs; i++) { jrdds.Add(new JvmObjectReference(SerDe.ReadObjectId(s))); } double time = SerDe.ReadDouble(s); IFormatter formatter = new BinaryFormatter(); object func = formatter.Deserialize(new MemoryStream(SerDe.ReadBytes(s))); string deserializer = SerDe.ReadString(s); RDD<dynamic> rdd = null; if (jrdds[0].Id != null) rdd = new RDD<dynamic>(new RDDIpcProxy(jrdds[0]), sparkContext, (SerializedMode)Enum.Parse(typeof(SerializedMode), deserializer)); if (func is Func<double, RDD<dynamic>, RDD<dynamic>>) { JvmObjectReference jrdd = (((Func<double, RDD<dynamic>, RDD<dynamic>>)func)(time, rdd).RddProxy as RDDIpcProxy).JvmRddReference; SerDe.Write(s, (byte)'j'); SerDe.Write(s, jrdd.Id); } else if (func is Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>) { string deserializer2 = SerDe.ReadString(s); RDD<dynamic> rdd2 = new RDD<dynamic>(new RDDIpcProxy(jrdds[1]), sparkContext, (SerializedMode)Enum.Parse(typeof(SerializedMode), deserializer2)); JvmObjectReference jrdd = (((Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>)func)(time, rdd, rdd2).RddProxy as RDDIpcProxy).JvmRddReference; SerDe.Write(s, (byte)'j'); SerDe.Write(s, jrdd.Id); } else { ((Action<double, RDD<dynamic>>)func)(time, rdd); SerDe.Write(s, (byte)'n'); } } } catch (Exception e) { //log exception only when callback socket is not shutdown explicitly if (!callbackSocketShutdown) { logger.LogException(e); } } } } } catch (Exception e) { logger.LogException(e); } logger.LogInfo("thread to process callback request exit"); }
private static extern uint ChangeRadioState(ref RDD pDevice, int dwState, int saveAction);
public int StartCallback() { TcpListener callbackServer = new TcpListener(IPAddress.Parse("127.0.0.1"), 0); callbackServer.Start(); Task.Run(() => { try { using (Socket sock = callbackServer.AcceptSocket()) using (var s = new NetworkStream(sock)) { while (true) { try { string cmd = SerDe.ReadString(s); if (cmd == "close") { break; } else if (cmd == "callback") { int numRDDs = SerDe.ReadInt(s); var jrdds = new List<JvmObjectReference>(); for (int i = 0; i < numRDDs; i++) { jrdds.Add(new JvmObjectReference(SerDe.ReadObjectId(s))); } double time = SerDe.ReadDouble(s); IFormatter formatter = new BinaryFormatter(); object func = formatter.Deserialize(new MemoryStream(SerDe.ReadBytes(s))); string deserializer = SerDe.ReadString(s); RDD<dynamic> rdd = null; if (jrdds[0].Id != null) rdd = new RDD<dynamic>(new RDDIpcProxy(jrdds[0]), sparkContext, (SerializedMode)Enum.Parse(typeof(SerializedMode), deserializer)); if (func is Func<double, RDD<dynamic>, RDD<dynamic>>) { JvmObjectReference jrdd = (((Func<double, RDD<dynamic>, RDD<dynamic>>)func)(time, rdd).RddProxy as RDDIpcProxy).JvmRddReference; SerDe.Write(s, (byte)'j'); SerDe.Write(s, jrdd.Id); } else if (func is Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>) { string deserializer2 = SerDe.ReadString(s); RDD<dynamic> rdd2 = new RDD<dynamic>(new RDDIpcProxy(jrdds[1]), sparkContext, (SerializedMode)Enum.Parse(typeof(SerializedMode), deserializer2)); JvmObjectReference jrdd = (((Func<double, RDD<dynamic>, RDD<dynamic>, RDD<dynamic>>)func)(time, rdd, rdd2).RddProxy as RDDIpcProxy).JvmRddReference; SerDe.Write(s, (byte)'j'); SerDe.Write(s, jrdd.Id); } else { ((Action<double, RDD<dynamic>>)func)(time, rdd); SerDe.Write(s, (byte)'n'); } } } catch (Exception e) { logger.LogInfo(e.ToString()); } } } } catch (Exception e) { logger.LogInfo(e.ToString()); throw; } finally { if (callbackServer != null) callbackServer.Stop(); } }); return (callbackServer.LocalEndpoint as IPEndPoint).Port; }
public void TestSqlContextCreateDataFrame() { // arrange var mockSparkContextProxy = new Mock<ISparkContextProxy>(); mockSparkContextProxy.Setup(m => m.CreateCSharpRdd(It.IsAny<IRDDProxy>(), It.IsAny<byte[]>(), It.IsAny<Dictionary<string, string>>(), It.IsAny<List<string>>(), It.IsAny<bool>(), It.IsAny<List<Broadcast>>(), It.IsAny<List<byte[]>>())); var rddProxy = new Mock<IRDDProxy>(); var rdd = new RDD<object[]>(rddProxy.Object, new SparkContext(mockSparkContextProxy.Object, new SparkConf())); var dataFrameProxy = new DataFrameIpcProxy(new JvmObjectReference("1"), mockSqlContextProxy.Object); mockSqlContextProxy.Setup(m => m.CreateDataFrame(It.IsAny<IRDDProxy>(), It.IsAny<IStructTypeProxy>())).Returns(dataFrameProxy); var sqlContext = new SqlContext(new SparkContext("", ""), mockSqlContextProxy.Object); var structTypeProxy = new Mock<IStructTypeProxy>(); const string schemaJson = @"{ ""fields"": [{ ""metadata"": {}, ""name"": ""guid"", ""nullable"": false, ""type"": ""string"" }], ""type"": ""struct"" }"; structTypeProxy.Setup(m => m.ToJson()).Returns(schemaJson); // act var dataFrame = sqlContext.CreateDataFrame(rdd, new StructType(structTypeProxy.Object)); // assert Assert.AreEqual(dataFrameProxy, dataFrame.DataFrameProxy); }
public void TestSqlContextCreateDataFrame() { // arrange var mockSparkContextProxy = new Mock<ISparkContextProxy>(); mockSparkContextProxy.Setup(m => m.CreateCSharpRdd(It.IsAny<IRDDProxy>(), It.IsAny<byte[]>(), It.IsAny<Dictionary<string, string>>(), It.IsAny<List<string>>(), It.IsAny<bool>(), It.IsAny<List<Broadcast>>(), It.IsAny<List<byte[]>>())); var rddProxy = new Mock<IRDDProxy>(); var rdd = new RDD<object[]>(rddProxy.Object, new SparkContext(mockSparkContextProxy.Object, new SparkConf())); var dataFrameProxy = new DataFrameIpcProxy(new JvmObjectReference("1"), mockSqlContextProxy.Object); mockSqlContextProxy.Setup(m => m.CreateDataFrame(It.IsAny<IRDDProxy>(), It.IsAny<IStructTypeProxy>())).Returns(dataFrameProxy); var sqlContext = new SqlContext(new SparkContext("", ""), mockSqlContextProxy.Object); var structTypeProxy = new Mock<IStructTypeProxy>(); structTypeProxy.Setup(m => m.ToJson()).Returns(RowHelper.ComplexJsonSchema); // act var dataFrame = sqlContext.CreateDataFrame(rdd, new StructType(structTypeProxy.Object)); // assert Assert.AreEqual(dataFrameProxy, dataFrame.DataFrameProxy); }
public static void Initialize() { var sparkContext = new SparkContext(null); var lines = sparkContext.TextFile(Path.GetTempFileName()); words = lines.FlatMap(l => l.Split(' ')); }
public void TestRddCache() { Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>(); rddProxy.Setup(m => m.Cache()); var rdd = new RDD<string> {rddProxy = rddProxy.Object}; Assert.IsFalse(rdd.IsCached); var cachedRdd = rdd.Cache(); Assert.IsTrue(cachedRdd.IsCached); }
public void TestRddPersistAndUnPersist() { Mock<IRDDProxy> rddProxy = new Mock<IRDDProxy>(); rddProxy.Setup(m => m.Persist(It.IsAny<StorageLevelType>())); var rdd = new RDD<string> {rddProxy = rddProxy.Object}; Assert.IsFalse(rdd.IsCached); // test persist var persistedRdd = rdd.Persist(StorageLevelType.MEMORY_AND_DISK); Assert.IsNotNull(persistedRdd); Assert.IsTrue(persistedRdd.IsCached); // test unpersist rddProxy.Setup(m => m.Unpersist()); var unPersistedRdd = persistedRdd.Unpersist(); Assert.IsNotNull(unPersistedRdd); Assert.IsFalse(unPersistedRdd.IsCached); }
private void ProcessCallbackRequest(object socket) { logger.LogInfo("new thread created to process callback request"); try { using (Socket sock = (Socket)socket) using (var s = new NetworkStream(sock)) { while (true) { try { string cmd = SerDe.ReadString(s); if (cmd == "close") { logger.LogInfo("receive close cmd from Scala side"); break; } else if (cmd == "callback") { int numRDDs = SerDe.ReadInt(s); var jrdds = new List <JvmObjectReference>(); for (int i = 0; i < numRDDs; i++) { jrdds.Add(new JvmObjectReference(SerDe.ReadObjectId(s))); } double time = SerDe.ReadDouble(s); IFormatter formatter = new BinaryFormatter(); object func = formatter.Deserialize(new MemoryStream(SerDe.ReadBytes(s))); string serializedMode = SerDe.ReadString(s); RDD <dynamic> rdd = null; if (jrdds[0].Id != null) { rdd = new RDD <dynamic>(new RDDIpcProxy(jrdds[0]), sparkContext, (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode)); } if (func is Func <double, RDD <dynamic>, RDD <dynamic> > ) { JvmObjectReference jrdd = ((((Func <double, RDD <dynamic>, RDD <dynamic> >)func)(time, rdd) as PipelinedRDD <dynamic>).RddProxy as RDDIpcProxy).JvmRddReference; SerDe.Write(s, (byte)'j'); SerDe.Write(s, jrdd.Id); } else if (func is Func <double, RDD <dynamic>, RDD <dynamic>, RDD <dynamic> > ) { string serializedMode2 = SerDe.ReadString(s); RDD <dynamic> rdd2 = new RDD <dynamic>(new RDDIpcProxy(jrdds[1]), sparkContext, (SerializedMode)Enum.Parse(typeof(SerializedMode), serializedMode2)); JvmObjectReference jrdd = ((((Func <double, RDD <dynamic>, RDD <dynamic>, RDD <dynamic> >)func)(time, rdd, rdd2) as PipelinedRDD <dynamic>).RddProxy as RDDIpcProxy).JvmRddReference; SerDe.Write(s, (byte)'j'); SerDe.Write(s, jrdd.Id); } else { ((Action <double, RDD <dynamic> >)func)(time, rdd); SerDe.Write(s, (byte)'n'); } } } catch (Exception e) { //log exception only when callback socket is not shutdown explicitly if (!callbackSocketShutdown) { logger.LogException(e); } } } } } catch (Exception e) { logger.LogException(e); } logger.LogInfo("thread to process callback request exit"); }