public void TestSparkContextConstructor() { var sparkContext = new SparkContext("masterUrl", "appName"); Assert.IsNotNull((sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference); var paramValuesToConstructor = (sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference as object[]; Assert.AreEqual("masterUrl", (paramValuesToConstructor[0] as MockSparkConfProxy).stringConfDictionary["mockmaster"]); Assert.AreEqual("appName", (paramValuesToConstructor[0] as MockSparkConfProxy).stringConfDictionary["mockappName"]); Assert.AreEqual(sparkContext, SparkContext.GetActiveSparkContext()); sparkContext = new SparkContext("masterUrl", "appName", "sparkhome"); Assert.IsNotNull((sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference); paramValuesToConstructor = (sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference as object[]; Assert.AreEqual("masterUrl", (paramValuesToConstructor[0] as MockSparkConfProxy).stringConfDictionary["mockmaster"]); Assert.AreEqual("appName", (paramValuesToConstructor[0] as MockSparkConfProxy).stringConfDictionary["mockappName"]); Assert.AreEqual("sparkhome", (paramValuesToConstructor[0] as MockSparkConfProxy).stringConfDictionary["mockhome"]); Assert.AreEqual(sparkContext, SparkContext.GetActiveSparkContext()); sparkContext = new SparkContext(null); Assert.IsNotNull((sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference); paramValuesToConstructor = (sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference as object[]; Assert.IsNotNull(paramValuesToConstructor[0]); //because SparkContext constructor create default sparkConf Assert.AreEqual(sparkContext, SparkContext.GetActiveSparkContext()); }
public static int Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(WordCountExample)); if (args.Length != 1) { Console.Error.WriteLine("Usage: WordCount <file>"); return(1); } var sparkContext = new SparkContext(new SparkConf().SetAppName("MobiusWordCount")); try { var lines = sparkContext.TextFile(args[0]); var counts = lines .FlatMap(x => x.Split(' ')) .Map(w => new Tuple <string, int>(w, 1)) .ReduceByKey((x, y) => x + y); foreach (var wordcount in counts.Collect()) { Console.WriteLine("{0}: {1}", wordcount.Item1, wordcount.Item2); } } catch (Exception ex) { Logger.LogError("Error performing Word Count"); Logger.LogException(ex); } sparkContext.Stop(); return(0); }
public void TestCSharpInputDStream() { // test create CSharpInputDStream var sc = new SparkContext("", ""); var ssc = new StreamingContext(sc, 1000L); Func<double, int, IEnumerable<string>> func = (double time, int pid) => { var list = new List<string>() { string.Format("PluggableInputDStream-{0}-{1}", pid, time) }; return list.AsEnumerable(); }; const int numPartitions = 5; var inputDStream = CSharpInputDStreamUtils.CreateStream<string>( ssc, numPartitions, func); Assert.IsNotNull(inputDStream); Assert.AreEqual(ssc, inputDStream.streamingContext); // test CSharpInputDStreamMapPartitionWithIndexHelper int[] array = new int[numPartitions]; int partitionIndex = 0; new CSharpInputDStreamMapPartitionWithIndexHelper<string>(0.0, func).Execute(partitionIndex, array.AsEnumerable()); // test CSharpInputDStreamGenerateRDDHelper new CSharpInputDStreamGenerateRDDHelper<string>(numPartitions, func).Execute(0.0); }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set var logger = LoggerServiceFactory.GetLogger(typeof(SparkXmlExample)); var inputXmlFilePath = args[0]; var outputXmlFilePath = args[1]; var sparkConf = new SparkConf(); sparkConf.SetAppName("myapp"); var sparkContext = new SparkContext(sparkConf); var sqlContext = new SqlContext(sparkContext); var df = sqlContext.Read() .Format("com.databricks.spark.xml") .Option("rowTag", "book") .Load(inputXmlFilePath); //"D:\temp\books.xml", "file:/D:/temp/books.xml" or "hdfs://temp/books.xml" df.ShowSchema(); var rowCount = df.Count(); logger.LogInfo("Row count is " + rowCount); var selectedData = df.Select("author", "@id"); selectedData.Write() .Format("com.databricks.spark.xml") .Option("rootTag", "books") .Option("rowTag", "book") .Save(outputXmlFilePath); //"D:\temp\booksUpdated.xml", "file:/D:/temp/booksUpdated.xml" or "hdfs://temp/booksUpdated.xml" sparkContext.Stop(); }
internal UserDefinedFunction(Func <int, IEnumerable <dynamic>, IEnumerable <dynamic> > func) { udfProxy = SparkCLREnvironment.SparkCLRProxy.SparkContextProxy.CreateUserDefinedCSharpFunction( func.GetType().Name, SparkContext.BuildCommand(func, SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT))); }
public void TestDropWithAny() { // arrange const string columnName = "column1"; var mockSchemaProxy = new Mock<IStructTypeProxy>(); var mockFieldProxy = new Mock<IStructFieldProxy>(); mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockSchemaProxy.Object); mockSchemaProxy.Setup(m => m.GetStructTypeFields()).Returns(new List<IStructFieldProxy> { mockFieldProxy.Object }); mockFieldProxy.Setup(m => m.GetStructFieldName()).Returns(columnName); var sparkContext = new SparkContext("", ""); mockDataFrameNaFunctionsProxy.Setup(m => m.Drop(It.IsAny<int>(), It.IsAny<string[]>())).Returns(mockDataFrameProxy.Object); var dataFrame = new DataFrame(mockDataFrameProxy.Object, sparkContext); var f = new DataFrameNaFunctions(mockDataFrameNaFunctionsProxy.Object, dataFrame, sparkContext); // act var cols = new[] { "col1", "col2" }; var df1 = f.Drop("any", cols); var df2 = f.Drop(); var df3 = f.Drop("any"); // verify Assert.IsNotNull(df1); Assert.AreEqual(df1.DataFrameProxy, dataFrame.DataFrameProxy); mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(cols.Length, cols), Times.Once); Assert.IsNotNull(df2); Assert.AreEqual(df2.DataFrameProxy, dataFrame.DataFrameProxy); Assert.IsNotNull(df3); Assert.AreEqual(df3.DataFrameProxy, dataFrame.DataFrameProxy); mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(1, new[] { columnName }), Times.Exactly(2)); }
/// <summary> /// Register UDF with 5 input arguments, e.g: /// SqlContext.RegisterFunction<bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg5) => arg1 != null && arg2 != null && ... && arg5 != null); /// sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName5)"); /// </summary> /// <typeparam name="RT"></typeparam> /// <typeparam name="A1"></typeparam> /// <typeparam name="A2"></typeparam> /// <typeparam name="A3"></typeparam> /// <typeparam name="A4"></typeparam> /// <typeparam name="A5"></typeparam> /// <param name="name"></param> /// <param name="f"></param> public void RegisterFunction <RT, A1, A2, A3, A4, A5>(string name, Func <A1, A2, A3, A4, A5, RT> f) { logger.LogInfo("Name of the function to register {0}, method info", name, f.Method); Func <int, IEnumerable <dynamic>, IEnumerable <dynamic> > udfHelper = new UdfHelper <RT, A1, A2, A3, A4, A5>(f).Execute; sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(typeof(RT))); }
public static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(PiExample)); var sparkContext = new SparkContext(new SparkConf()); try { const int slices = 3; var numberOfItems = (int)Math.Min(100000L * slices, int.MaxValue); var values = new List <int>(numberOfItems); for (var i = 0; i <= numberOfItems; i++) { values.Add(i); } var rdd = sparkContext.Parallelize(values, slices); CalculatePiUsingAnonymousMethod(numberOfItems, rdd); CalculatePiUsingSerializedClassApproach(numberOfItems, rdd); Logger.LogInfo("Completed calculating the value of Pi"); } catch (Exception ex) { Logger.LogError("Error calculating Pi"); Logger.LogException(ex); } sparkContext.Stop(); }
public void TestSparkContextProxy() { var sparkContext = new SparkContext(Env.SPARK_MASTER_URL, "appName"); sparkContext.AddFile(null); sparkContext.BinaryFiles(null, null); sparkContext.CancelAllJobs(); sparkContext.CancelJobGroup(null); sparkContext.EmptyRDD <string>(); sparkContext.GetLocalProperty(null); sparkContext.HadoopFile(null, null, null, null); sparkContext.HadoopRDD(null, null, null); sparkContext.NewAPIHadoopFile(null, null, null, null); sparkContext.NewAPIHadoopRDD(null, null, null); sparkContext.Parallelize <int>(new int[] { 1, 2, 3, 4, 5 }); sparkContext.SequenceFile(null, null, null, null, null, null); sparkContext.SetCheckpointDir(null); sparkContext.SetJobGroup(null, null); sparkContext.SetLocalProperty(null, null); sparkContext.SetLogLevel(null); sparkContext.TextFile(null); sparkContext.WholeTextFiles(null); sparkContext.Stop(); sparkContext.Union <string>(null); }
public static void Initialize() { var sparkContext = new SparkContext(null); var lines = sparkContext.TextFile(Path.GetTempFileName()); words = lines.FlatMap(l => l.Split(' ')); }
/// <summary> /// /// To read the data from the xml file and to retrieve the data /// </summary> private static void SparkXml() { var sparkConf = new SparkConf(); sparkConf.SetMaster("yarn"); sparkConf.SetAppName("SparkXmlMobius"); sparkContext = new SparkContext(sparkConf); var sqlContext = new SqlContext(sparkContext); var dataframe = sqlContext.Read() .Format("com.databricks.spark.xml") .Option("rowTag", "book") .Load(inputXmlFilePath); var rowCount = dataframe.Count(); logger.LogInfo("****Row count is " + rowCount + "****"); var rowCollections = dataframe.Collect(); logger.LogInfo("**********************************************"); foreach (var row in rowCollections) { Console.WriteLine("{0}", row); } logger.LogInfo("*********************************************"); logger.LogInfo("Executed Successfully................."); }
public void TestDropWithCols() { // arrange var sparkContext = new SparkContext("", ""); mockDataFrameNaFunctionsProxy.Setup(m => m.Drop(It.IsAny<int>(), It.IsAny<string[]>())).Returns(mockDataFrameProxy.Object); var dataFrame = new DataFrame(mockDataFrameProxy.Object, sparkContext); var f = new DataFrameNaFunctions(mockDataFrameNaFunctionsProxy.Object, dataFrame, sparkContext); // act var cols = new[] { "col1", "col2" }; var df = f.Drop(cols); // verify Assert.IsNotNull(df); Assert.AreEqual(df.DataFrameProxy, dataFrame.DataFrameProxy); Assert.AreNotSame(dataFrame, df); mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(cols.Length, cols), Times.Once); mockDataFrameNaFunctionsProxy.Reset(); df = f.Drop(new string[] { }); Assert.AreSame(dataFrame, df); mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(It.IsAny<int>(), It.IsAny<string[]>()), Times.Never); }
public void TestReplaceWithColumns() { // arrange var sparkContext = new SparkContext("", ""); mockDataFrameNaFunctionsProxy.Setup(m => m.Replace(It.IsAny <string[]>(), It.IsAny <Dictionary <string, string> >())) .Returns(mockDataFrameProxy.Object); // act var replacement = new Dictionary <string, string>() { { "", "unknown" }, { "?", "unknown" } }; var dataFrame = new DataFrame(mockDataFrameProxy.Object, sparkContext); var f = new DataFrameNaFunctions(mockDataFrameNaFunctionsProxy.Object, dataFrame, sparkContext); var cols = new[] { "col1", "col2" }; var df = f.Replace(cols, replacement); // verify Assert.IsNotNull(df); Assert.AreEqual(df.DataFrameProxy, dataFrame.DataFrameProxy); Assert.AreNotSame(dataFrame, df); mockDataFrameNaFunctionsProxy.Verify(m => m.Replace(cols, replacement), Times.Once); }
public void TestDropWithMinNonNulls() { const string columnName = "column1"; var mockSchemaProxy = new Mock <IStructTypeProxy>(); var mockFieldProxy = new Mock <IStructFieldProxy>(); mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockSchemaProxy.Object); mockSchemaProxy.Setup(m => m.GetStructTypeFields()).Returns(new List <IStructFieldProxy> { mockFieldProxy.Object }); mockFieldProxy.Setup(m => m.GetStructFieldName()).Returns(columnName); var sparkContext = new SparkContext("", ""); mockDataFrameNaFunctionsProxy.Setup(m => m.Drop(It.IsAny <int>(), It.IsAny <string[]>())).Returns(mockDataFrameProxy.Object); var dataFrame = new DataFrame(mockDataFrameProxy.Object, sparkContext); var f = new DataFrameNaFunctions(mockDataFrameNaFunctionsProxy.Object, dataFrame, sparkContext); var df = f.Drop(20); Assert.IsNotNull(df); Assert.AreEqual(df.DataFrameProxy, dataFrame.DataFrameProxy); Assert.AreNotSame(dataFrame, df); mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(20, new[] { columnName }), Times.Once); }
public void TestDropWithCols() { // arrange var sparkContext = new SparkContext("", ""); mockDataFrameNaFunctionsProxy.Setup(m => m.Drop(It.IsAny <int>(), It.IsAny <string[]>())).Returns(mockDataFrameProxy.Object); var dataFrame = new DataFrame(mockDataFrameProxy.Object, sparkContext); var f = new DataFrameNaFunctions(mockDataFrameNaFunctionsProxy.Object, dataFrame, sparkContext); // act var cols = new[] { "col1", "col2" }; var df = f.Drop(cols); // verify Assert.IsNotNull(df); Assert.AreEqual(df.DataFrameProxy, dataFrame.DataFrameProxy); Assert.AreNotSame(dataFrame, df); mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(cols.Length, cols), Times.Once); mockDataFrameNaFunctionsProxy.Reset(); df = f.Drop(new string[] { }); Assert.AreSame(dataFrame, df); mockDataFrameNaFunctionsProxy.Verify(m => m.Drop(It.IsAny <int>(), It.IsAny <string[]>()), Times.Never); }
public void TestHadoopRDD() { // Arrange const string keyClass = "java.lang.Long"; const string valueClass = "java.lang.String"; const string keyConverterClass = "xyz.KeyConveter"; const string valueConverterClass = "xyz.valueConveter"; Mock <IRDDProxy> rddProxy = new Mock <IRDDProxy>(); Mock <ISparkContextProxy> sparkContextProxy = new Mock <ISparkContextProxy>(); sparkContextProxy.Setup(m => m.HadoopRDD(It.IsAny <string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny <IEnumerable <KeyValuePair <string, string> > >(), It.IsAny <int>())) .Returns(rddProxy.Object); SparkContext sc = new SparkContext(sparkContextProxy.Object, null); const string inputFormatClass = "org.apache.hadoop.mapreduce.lib.input.TextInputFormat"; var conf = new KeyValuePair <string, string>[] { }; // Act RDD <byte[]> rdd = sc.HadoopRDD(inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, conf); // Assert Assert.IsNotNull(rdd); Assert.AreEqual(rddProxy.Object, rdd.RddProxy); Assert.AreEqual(sc, rdd.sparkContext); Assert.AreEqual(SerializedMode.None, rdd.serializedMode); }
public static int Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(WordCountExample)); if (args.Length != 1) { Console.Error.WriteLine("Usage: WordCount <file>"); return 1; } var sparkContext = new SparkContext(new SparkConf().SetAppName("MobiusWordCount")); try { var lines = sparkContext.TextFile(args[0]); var counts = lines .FlatMap(x => x.Split(' ')) .Map(w => new KeyValuePair<string, int>(w, 1)) .ReduceByKey((x, y) => x + y); foreach (var wordcount in counts.Collect()) { Console.WriteLine("{0}: {1}", wordcount.Key, wordcount.Value); } } catch (Exception ex) { Logger.LogError("Error performing Word Count"); Logger.LogException(ex); } sparkContext.Stop(); return 0; }
public static void Initialize(TestContext context) { var sparkContext = new SparkContext(null); var lines = sparkContext.TextFile(Path.GetTempFileName()); var words = lines.FlatMap(l => l.Split(' ')); doubles = words.Map(w => new KeyValuePair<string, int>(w, 1)).ReduceByKey((x, y) => x + y).Map(kv => (double)kv.Value); }
public static void Initialize() { var sparkContext = new SparkContext(null); var lines = sparkContext.TextFile(Path.GetTempFileName()); var words = lines.FlatMap(l => l.Split(' ')); pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); }
public void TestSignaturesV2_3_X() { SparkContext sc = SparkContext.GetOrCreate(new SparkConf()); Assert.IsType <SparkConf>(sc.GetConf()); Assert.IsType <int>(sc.DefaultParallelism); sc.SetJobDescription("job description"); sc.SetLogLevel("ALL"); sc.SetLogLevel("debug"); Assert.Throws <Exception>(() => sc.SetLogLevel("INVALID")); sc.SetJobGroup("group id", "description"); sc.SetJobGroup("group id", "description", true); sc.ClearJobGroup(); string filePath = $"{TestEnvironment.ResourceDirectory}people.txt"; sc.AddFile(filePath); sc.AddFile(filePath, true); using var tempDir = new TemporaryDirectory(); sc.SetCheckpointDir(TestEnvironment.ResourceDirectory); Assert.IsType <Configuration>(sc.HadoopConfiguration()); }
public void TestSequenceFiles() { // Arrange const string filePath = @"hdfs://path/to/files"; const int defaultParallelism = 10; const string keyClass = "java.lang.Long"; const string valueClass = "java.lang.String"; const string keyConverterClass = "xyz.KeyConveter"; const string valueConverterClass = "xyz.valueConveter"; Mock <IRDDProxy> rddProxy = new Mock <IRDDProxy>(); Mock <ISparkContextProxy> sparkContextProxy = new Mock <ISparkContextProxy>(); sparkContextProxy.Setup(m => m.SequenceFile(filePath, keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny <int>(), It.IsAny <int>())) .Returns(rddProxy.Object); sparkContextProxy.Setup(m => m.DefaultParallelism).Returns(defaultParallelism); SparkContext sc = new SparkContext(sparkContextProxy.Object, null); // Act RDD <byte[]> rdd = sc.SequenceFile(filePath, keyClass, valueClass, keyConverterClass, valueConverterClass, null); // Assert Assert.IsNotNull(rdd); Assert.AreEqual(rddProxy.Object, rdd.RddProxy); Assert.AreEqual(sc, rdd.sparkContext); Assert.AreEqual(SerializedMode.None, rdd.serializedMode); }
public void TestNewAPIHadoopFile() { // Arrange const string filePath = @"hdfs://path/to/files"; const string keyClass = "java.lang.Long"; const string valueClass = "java.lang.String"; const string keyConverterClass = "xyz.KeyConveter"; const string valueConverterClass = "xyz.valueConveter"; Mock <IRDDProxy> rddProxy = new Mock <IRDDProxy>(); Mock <ISparkContextProxy> sparkContextProxy = new Mock <ISparkContextProxy>(); sparkContextProxy.Setup(m => m.NewAPIHadoopFile(filePath, It.IsAny <string>(), keyClass, valueClass, keyConverterClass, valueConverterClass, It.IsAny <IEnumerable <Tuple <string, string> > >(), It.IsAny <int>())) .Returns(rddProxy.Object); SparkContext sc = new SparkContext(sparkContextProxy.Object, null); const string inputFormatClass = "org.apache.hadoop.mapreduce.lib.input.TextInputFormat"; // Act RDD <byte[]> rdd = sc.NewAPIHadoopFile(filePath, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass); // Assert Assert.IsNotNull(rdd); Assert.AreEqual(rddProxy.Object, rdd.RddProxy); Assert.AreEqual(sc, rdd.sparkContext); Assert.AreEqual(SerializedMode.None, rdd.serializedMode); }
public IEnumerable<Model> Get() { _sparkContext = Program.SparkContext; var crimeDataFrame = GetSqlContext() .TextFile(CrimeFilePath) .Cache(); var tempTdd = _sparkContext.TextFile(CrimeFilePath) .Map(l => new object[] { int.Parse(l.Substring(0, 3)), int.Parse(l.Substring(4, 3)), int.Parse(l.Substring(8, 4)), }); var data = GetSqlContext().CreateDataFrame(tempTdd, new StructType(new List<StructField> { new StructField("Field1", new IntegerType()), new StructField("Field2", new IntegerType()), new StructField("Field3", new IntegerType()) })); data.Show(); data.RegisterTempTable("data"); return GetSqlContext().Sql("SELECT Field1, Field2, Field3 FROM data") .Collect() .Select(l => new Model { Field1 = l.Get("Field1"), Field2 = l.Get("Field2"), Field3 = l.Get("Field3"), }).ToList(); }
public static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(PiExample)); var sparkContext = new SparkContext(new SparkConf()); try { const int slices = 3; var numberOfItems = (int)Math.Min(100000L * slices, int.MaxValue); var values = new List<int>(numberOfItems); for (var i = 0; i <= numberOfItems; i++) { values.Add(i); } var rdd = sparkContext.Parallelize(values, slices); CalculatePiUsingAnonymousMethod(numberOfItems, rdd); CalculatePiUsingSerializedClassApproach(numberOfItems, rdd); Logger.LogInfo("Completed calculating the value of Pi"); } catch (Exception ex) { Logger.LogError("Error calculating Pi"); Logger.LogException(ex); } sparkContext.Stop(); }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRSamples)); Configuration = CommandlineArgumentProcessor.ProcessArugments(args); PrintLogLocation(); bool status = true; if (Configuration.IsDryrun) { status = SamplesRunner.RunSamples(); } else { SparkContext = CreateSparkContext(); SparkContext.SetCheckpointDir(Path.GetTempPath()); status = SamplesRunner.RunSamples(); PrintLogLocation(); ConsoleWriteLine("Completed running samples. Calling SparkContext.Stop() to tear down ..."); //following comment is necessary due to known issue in Spark. See https://issues.apache.org/jira/browse/SPARK-8333 ConsoleWriteLine("If this program (SparkCLRSamples.exe) does not terminate in 10 seconds, please manually terminate java process launched by this program!!!"); //TODO - add instructions to terminate java process SparkContext.Stop(); } if (Configuration.IsValidationEnabled && !status) { Environment.Exit(1); } }
public void TestSparkContextStop() { var sparkContext = new SparkContext(null); Assert.IsNotNull((sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference); sparkContext.Stop(); Assert.IsNull((sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference); }
public void TestWorkerWithDynamicLibrary() { var originalRunMode = Environment.GetEnvironmentVariable("SPARKCLR_RUN_MODE"); var originalCompilationDir = Environment.GetEnvironmentVariable("SPARKCLR_SCRIPT_COMPILATION_DIR"); var compilationDir = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); Directory.CreateDirectory(compilationDir); // copy dll var currentDir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); File.Copy(Path.Combine(currentDir, "Microsoft.Spark.CSharp.Adapter.dll"), Path.Combine(compilationDir, "ReplCompilation.1")); try { Environment.SetEnvironmentVariable("SPARKCLR_RUN_MODE", "R"); Process worker; var CSharpRDD_SocketServer = CreateServer(out worker); using (var serverSocket = CSharpRDD_SocketServer.Accept()) using (var s = serverSocket.GetStream()) { WritePayloadHeaderToWorker(s); Environment.SetEnvironmentVariable("SPARKCLR_SCRIPT_COMPILATION_DIR", compilationDir); byte[] commandWithDynamicLibraryPath = SparkContext.BuildCommand(new CSharpWorkerFunc((pid, iter) => iter), SerializedMode.String, SerializedMode.String); SerDe.Write(s, commandWithDynamicLibraryPath.Length); SerDe.Write(s, commandWithDynamicLibraryPath); for (int i = 0; i < 100; i++) { SerDe.Write(s, i.ToString()); } SerDe.Write(s, (int)SpecialLengths.END_OF_DATA_SECTION); SerDe.Write(s, (int)SpecialLengths.END_OF_STREAM); s.Flush(); int count = 0; foreach (var bytes in ReadWorker(s)) { Assert.AreEqual(count++.ToString(), Encoding.UTF8.GetString(bytes)); } Assert.AreEqual(100, count); } AssertWorker(worker); CSharpRDD_SocketServer.Close(); } finally { Environment.SetEnvironmentVariable("SPARKCLR_RUN_MODE", originalRunMode); Environment.SetEnvironmentVariable("SPARKCLR_SCRIPT_COMPILATION_DIR", originalCompilationDir); Directory.Delete(compilationDir, true); } }
internal static void DStreamTextFileSamples() { count = 0; string directory = SparkCLRSamples.Configuration.SampleDataLocation; string checkpointPath = Path.Combine(directory, "checkpoint"); SparkContext sc = SparkCLRSamples.SparkContext; var b = sc.Broadcast <int>(0); StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath, () => { StreamingContext context = new StreamingContext(sc, 2000); context.Checkpoint(checkpointPath); var lines = context.TextFileStream(Path.Combine(directory, "test")); lines = context.Union(lines, lines); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair <string, int>(w, 1)); // since operations like ReduceByKey, Join and UpdateStateByKey are // separate dstream transformations defined in CSharpDStream.scala // an extra CSharpRDD is introduced in between these operations var wordCounts = pairs.ReduceByKey((x, y) => x + y); var join = wordCounts.Join(wordCounts, 2); var state = join.UpdateStateByKey <string, Tuple <int, int>, int>(new UpdateStateHelper(b).Execute); state.ForeachRDD((time, rdd) => { // there's chance rdd.Take conflicts with ssc.Stop if (stopFileServer) { return; } object[] taken = rdd.Take(10); Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); foreach (object record in taken) { Console.WriteLine(record); } Console.WriteLine(); stopFileServer = count++ > 100; }); return(context); }); ssc.Start(); StartFileServer(directory, "words.txt", 100); ssc.AwaitTermination(); ssc.Stop(); }
public void TestRdd() { const string jsonSchema = @" { ""type"" : ""struct"", ""fields"" : [{ ""name"" : ""age"", ""type"" : ""long"", ""nullable"" : true, ""metadata"" : { } }, { ""name"" : ""id"", ""type"" : ""string"", ""nullable"" : true, ""metadata"" : { } }, { ""name"" : ""name"", ""type"" : ""string"", ""nullable"" : true, ""metadata"" : { } } ] }"; Mock <IStructTypeProxy> mockStructTypeProxy = new Mock <IStructTypeProxy>(); mockStructTypeProxy.Setup(m => m.ToJson()).Returns(jsonSchema); mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockStructTypeProxy.Object); var rows = new object[] { new RowImpl(new object[] { 34, "123", "Bill" }, RowSchema.ParseRowSchemaFromJson(jsonSchema)) }; mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(new MockRddProxy(rows)); var sc = new SparkContext(null); var dataFrame = new DataFrame(mockDataFrameProxy.Object, sc); // act var rdd = dataFrame.Rdd; Assert.IsNotNull(rdd); mockDataFrameProxy.Verify(m => m.JavaToCSharp(), Times.Once); mockStructTypeProxy.Verify(m => m.ToJson(), Times.Once); mockDataFrameProxy.Reset(); mockStructTypeProxy.Reset(); rdd = dataFrame.Rdd; Assert.IsNotNull(rdd); mockDataFrameProxy.Verify(m => m.JavaToCSharp(), Times.Never); mockStructTypeProxy.Verify(m => m.ToJson(), Times.Never); }
/// <summary> /// Get the existing SQLContext or create a new one with given SparkContext. /// </summary> /// <param name="sparkContext"></param> /// <returns></returns> public static SqlContext GetOrCreate(SparkContext sparkContext) { if (instance == null) { return(new SqlContext(sparkContext)); } return(instance); }
public void RegisterFunction(string name, MethodInfo f) { logger.LogInfo("Name of the function to register {0}, method info", name, f.DeclaringType?.FullName + "." + f.Name); var helper = new UdfReflectionHelper(f); Func <int, IEnumerable <dynamic>, IEnumerable <dynamic> > udfHelper = helper.Execute; sqlContextProxy.RegisterFunction(name, SparkContext.BuildCommand(new CSharpWorkerFunc(udfHelper), SerializedMode.Row, SerializedMode.Row), Functions.GetReturnType(helper.ReturnType)); }
public static void Initialize() { var sparkContext = new SparkContext(null); var lines = sparkContext.TextFile(Path.GetTempFileName()); var words = lines.FlatMap(l => l.Split(' ')); doubles = words.Map(w => new KeyValuePair <string, int>(w, 1)).ReduceByKey((x, y) => x + y).Map(kv => (double)kv.Value); }
public void TestSparkContextTextFile() { var sparkContext = new SparkContext(null); var rdd = sparkContext.TextFile(@"c:\path\to\rddinput.txt", 8); var paramValuesToTextFileMethod = (rdd.RddProxy as MockRddProxy).mockRddReference as object[]; Assert.AreEqual(@"c:\path\to\rddinput.txt", paramValuesToTextFileMethod[0]); Assert.AreEqual(8, paramValuesToTextFileMethod[1]); }
public static void Initialize() { var sparkContext = new SparkContext(null); var lines = sparkContext.TextFile(Path.GetTempFileName()); var words = lines.FlatMap(l => l.Split(' ')); pairs = words.Map(w => new KeyValuePair <string, int>(w, 1)); }
public void TestRddTextFile() { var sparkContext = new SparkContext(null); var rdd = sparkContext.TextFile(@"c:\path\to\rddinput.txt"); var paramValuesToTextFileMethod = (rdd.RddProxy as MockRddProxy).mockRddReference as object[]; Assert.AreEqual(@"c:\path\to\rddinput.txt", paramValuesToTextFileMethod[0]); Assert.AreEqual(0, int.Parse(paramValuesToTextFileMethod[1].ToString())); //checking default partitions }
private static void InitializeSparkContext(string[] args) { var sparkConf = new SparkConf(); sparkConf.Set("spark.local.dir", args[0]); sparkConf.SetAppName("SparkCLR perf suite - C#"); SparkContext = new SparkContext(sparkConf); SqlContext = new SqlContext(PerfBenchmark.SparkContext); }
public void TestBroadcastVariablesInWorker() { Process worker; var CSharpRDD_SocketServer = CreateServer(out worker); string assertMessage; using (var serverSocket = CSharpRDD_SocketServer.Accept()) using (var s = serverSocket.GetStream()) { SerDe.Write(s, splitIndex); SerDe.Write(s, ver); SerDe.Write(s, 0); SerDe.Write(s, 0); SerDe.Write(s, 0); SerDe.Write(s, 0L); SerDe.Write(s, sparkFilesDir); SerDe.Write(s, numberOfIncludesItems); // broadcastVariablesToAdd and broadcastVariablesToDelete are used to trigger broadcast variables operation(register and remove) in worker side, // after worker exists, check wheather expected number of broadcast variables are processed. var broadcastVariablesToAdd = new long[] { 101L, 102L, 103L }; var broadcastVariablesToDelete = new long[] { 10L, 20L }; SerDe.Write(s, broadcastVariablesToAdd.Length + broadcastVariablesToDelete.Length); broadcastVariablesToAdd.ToList().ForEach(bid => { SerDe.Write(s, bid); SerDe.Write(s, "path" + bid); }); broadcastVariablesToDelete.ToList().ForEach(bid => SerDe.Write(s, -bid - 1)); SerDe.Write(s, 0); //flag for UDF byte[] command = SparkContext.BuildCommand(new CSharpWorkerFunc((pid, iter) => iter), SerializedMode.String, SerializedMode.String); SerDe.Write(s, command.Length); SerDe.Write(s, command); for (int i = 0; i < 100; i++) { SerDe.Write(s, i.ToString()); } SerDe.Write(s, (int)SpecialLengths.END_OF_DATA_SECTION); SerDe.Write(s, (int)SpecialLengths.END_OF_STREAM); s.Flush(); int count = 0; foreach (var bytes in ReadWorker(s)) { Assert.AreEqual(count++.ToString(), Encoding.UTF8.GetString(bytes)); } Assert.AreEqual(100, count); // TODO verification should not depends on the output of worker // we postpone the test of assertMessage after worker exit assertMessage = "num_broadcast_variables: " + (broadcastVariablesToAdd.Length + broadcastVariablesToDelete.Length); } AssertWorker(worker, 0, assertMessage); CSharpRDD_SocketServer.Close(); }
internal RDD <T> Execute(double t) { var sc = SparkContext.GetActiveSparkContext(); int[] array = new int[numPartitions]; var initialRdd = sc.Parallelize(array.AsEnumerable(), numPartitions); return(initialRdd.MapPartitionsWithIndex <T>(new CSharpInputDStreamMapPartitionWithIndexHelper <T>(t, func).Execute, true)); }
/// <summary> /// Creates a SqlContext /// </summary> /// <param name="sparkContext"></param> public SqlContext(SparkContext sparkContext) { this.sparkContext = sparkContext; sqlContextProxy = sparkContext.SparkContextProxy.CreateSqlContext(); if (instance == null) { instance = this; } }
internal SqlContext(SparkContext sparkContext, ISqlContextProxy sqlContextProxy) { this.sparkContext = sparkContext; this.sqlContextProxy = sqlContextProxy; if (instance == null) { instance = this; } }
public void TestInitialize() { sc = new SparkContext(null); sc.StartAccumulatorServer(); // get accumulator server port and connect to accumuator server int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort; sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); sock.Connect(IPAddress.Loopback, serverPort); }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRSamples)); ProcessArugments(args); SparkContext = CreateSparkContext(); SparkContext.SetCheckpointDir(Path.GetTempPath()); RunSamples(); SparkContext.Stop(); }
public StreamingContextIpcProxy(SparkContext sparkContext, long durationMs) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { durationMs }); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); }
static void Main(string[] args) { SparkContext = CreateSparkContext(); using (WebApp.Start<Startup>("http://localhost:9000/")) { Console.ReadLine(); } SparkContext.Stop(); }
public StreamingContextIpcProxy(string checkpointPath) { jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath }); jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc")); JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc")); JvmObjectReference jvmSparkConfReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf")); JvmObjectReference jvmJavaContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext")); sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference); var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference); sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy)); }
public void TestRddUnion() { var sparkContext = new SparkContext(null); var rdd = sparkContext.TextFile(@"c:\path\to\rddinput.txt"); var rdd2 = sparkContext.TextFile(@"c:\path\to\rddinput2.txt"); var unionRdd = rdd.Union(rdd2); var paramValuesToUnionMethod = ((unionRdd.RddProxy as MockRddProxy).mockRddReference as object[]); var paramValuesToTextFileMethodInRdd1 = (paramValuesToUnionMethod[0] as MockRddProxy).mockRddReference as object[]; Assert.AreEqual(@"c:\path\to\rddinput.txt", paramValuesToTextFileMethodInRdd1[0]); var paramValuesToTextFileMethodInRdd2 = (paramValuesToUnionMethod[1] as MockRddProxy).mockRddReference as object[]; Assert.AreEqual(@"c:\path\to\rddinput2.txt", paramValuesToTextFileMethodInRdd2[0]); }
public StreamingContextIpcProxy(SparkContext sparkContext, long durationMs) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { durationMs }); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); int port = StartCallback(); SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("SparkCLRHandler", "connectCallback", port); //className and methodName hardcoded in CSharpBackendHandler }
public StreamingContextIpcProxy(SparkContext sparkContext, int durationSeconds) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); StartAccumulatorServer(sparkContext); StartCallbackServer(); }
public void TestConstantInputDStream() { var sc = new SparkContext("", ""); var rdd = sc.Parallelize(Enumerable.Range(0, 10), 1); var ssc = new StreamingContext(sc, 1000L); // test when rdd is null Assert.Throws<ArgumentNullException>(() => new ConstantInputDStream<int>(null, ssc)); var constantInputDStream = new ConstantInputDStream<int>(rdd, ssc); Assert.IsNotNull(constantInputDStream); Assert.AreEqual(ssc, constantInputDStream.streamingContext); }
static void Main(string[] args) { var sparkContext = new SparkContext(new SparkConf().SetAppName("SparkCLREventHub Example")); var eventhubsParams = new Dictionary<string, string>() { {"eventhubs.policyname", "<policyname>"}, {"eventhubs.policykey", "<policykey>"}, {"eventhubs.namespace", "<namespace>"}, {"eventhubs.name", "<name>"}, {"eventhubs.partition.count", "<partitioncount>"}, {"eventhubs.consumergroup", "$default"}, {"eventhubs.checkpoint.dir", "<hdfs path to eventhub checkpoint dir>"}, {"eventhubs.checkpoint.interval", "<interval>"}, }; const int windowDurationInSecs = 5; const int slideDurationInSecs = 5; const string checkpointPath = "<hdfs path to spark checkpoint dir>"; //const string outputPath = "<hdfs path to output dir>"; const long slideDurationInMillis = 5000; StreamingContext sparkStreamingContext = StreamingContext.GetOrCreate(checkpointPath, () => { var ssc = new StreamingContext(sparkContext, slideDurationInMillis); ssc.Checkpoint(checkpointPath); var stream = EventHubsUtils.CreateUnionStream(ssc, eventhubsParams); var countByLogLevelAndTime = stream .Map(bytes => Encoding.UTF8.GetString(bytes)) .Filter(line => line.Contains(",")) .Map(line => line.Split(',')) .Map(columns => new KeyValuePair<string, int>(string.Format("{0},{1}", columns[0], columns[1]), 1)) .ReduceByKeyAndWindow((x, y) => x + y, (x, y) => x - y, windowDurationInSecs, slideDurationInSecs, 3) .Map(logLevelCountPair => string.Format("{0},{1}", logLevelCountPair.Key, logLevelCountPair.Value)); countByLogLevelAndTime.ForeachRDD(countByLogLevel => { //dimensionalCount.SaveAsTextFile(string.Format("{0}/{1}", outputPath, Guid.NewGuid())); var dimensionalCountCollection = countByLogLevel.Collect(); foreach (var dimensionalCountItem in dimensionalCountCollection) { Console.WriteLine(dimensionalCountItem); } }); return ssc; }); sparkStreamingContext.Start(); sparkStreamingContext.AwaitTermination(); }
public StreamingContextIpcProxy(string checkpointPath) { jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath }); jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc")); JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc")); JvmObjectReference jvmSparkConfReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf")); JvmObjectReference jvmJavaContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext")); sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference); var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference); sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy)); // TODO: We don't know whether accumulator variable is used before restart. We just start accumuator server for safety. sparkContext.StartAccumulatorServer(); }
static void Main(string[] args) { var sparkContext = new SparkContext(new SparkConf().SetAppName("SparkCLRKafka Example")); const string topicName = "<topicName>"; var topicList = new List<string> {topicName}; var kafkaParams = new Dictionary<string, string> //refer to http://kafka.apache.org/documentation.html#configuration { {"metadata.broker.list", "<kafka brokers list>"}, {"auto.offset.reset", "smallest"} }; var perTopicPartitionKafkaOffsets = new Dictionary<string, long>(); const int windowDurationInSecs = 5; const int slideDurationInSecs = 5; const string checkpointPath = "<hdfs path to spark checkpoint directory>"; const string appOutputPath = "<hdfs path to app output directory>"; const long slideDurationInMillis = 5000; StreamingContext sparkStreamingContext = StreamingContext.GetOrCreate(checkpointPath, () => { var ssc = new StreamingContext(sparkContext, slideDurationInMillis); ssc.Checkpoint(checkpointPath); var stream = KafkaUtils.CreateDirectStream(ssc, topicList, kafkaParams, perTopicPartitionKafkaOffsets); var countByLogLevelAndTime = stream .Map(kvp => Encoding.UTF8.GetString(kvp.Value)) .Filter(line => line.Contains(",")) .Map(line => line.Split(',')) .Map(columns => new KeyValuePair<string, int>(string.Format("{0},{1}", columns[0], columns[1]), 1)) .ReduceByKeyAndWindow((x, y) => x + y, (x, y) => x - y, windowDurationInSecs, slideDurationInSecs, 3) .Map(logLevelCountPair => string.Format("{0},{1}", logLevelCountPair.Key, logLevelCountPair.Value)); countByLogLevelAndTime.ForeachRDD(countByLogLevel => { countByLogLevel.SaveAsTextFile(string.Format("{0}/{1}", appOutputPath, Guid.NewGuid())); foreach (var logCount in countByLogLevel.Collect()) { Console.WriteLine(logCount); } }); return ssc; }); sparkStreamingContext.Start(); sparkStreamingContext.AwaitTermination(); }
public void TestAccumuatorSuccess() { var sc = new SparkContext(null); Accumulator<int> accumulator = sc.Accumulator<int>(0); // get accumulator server port and connect to accumuator server int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort; var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); sock.Connect(IPAddress.Loopback, serverPort); using (var s = new NetworkStream(sock)) { // write numUpdates int numUpdates = 1; SerDe.Write(s, numUpdates); // write update int key = 0; int value = 100; KeyValuePair<int, dynamic> update = new KeyValuePair<int, dynamic>(key, value); var ms = new MemoryStream(); var formatter = new BinaryFormatter(); formatter.Serialize(ms, update); byte[] sendBuffer = ms.ToArray(); SerDe.Write(s, sendBuffer.Length); SerDe.Write(s, sendBuffer); s.Flush(); byte[] receiveBuffer = new byte[1]; s.Read(receiveBuffer, 0, 1); Assert.AreEqual(accumulator.Value, value); // try to let service side to close gracefully sc.Stop(); try { numUpdates = 0; SerDe.Write(s, numUpdates); } catch { // do nothing here } } sock.Close(); }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set var logger = LoggerServiceFactory.GetLogger(typeof(JdbcDataFrameExample)); var sparkConf = new SparkConf(); var sparkContext = new SparkContext(sparkConf); var sqlContext = new SqlContext(sparkContext); var df = sqlContext.Read() .Jdbc("jdbc:sqlserver://localhost:1433;databaseName=Temp;;integratedSecurity=true;", "xyz", new Dictionary<string, string>()); df.ShowSchema(); var rowCount = df.Count(); logger.LogInfo("Row count is " + rowCount); }
public RoslynScriptEngine(SparkContext sc, SqlContext sqlContext) { this.sc = sc; sparkConf = sc.GetConf(); host = new SparkCLRHost { sc = sc, sqlContext = sqlContext }; var sparkLocalDir = sparkConf.Get("spark.local.dir", Path.GetTempPath()); compilationDumpDirectory = Path.Combine(sparkLocalDir, Path.GetRandomFileName()); Directory.CreateDirectory(compilationDumpDirectory); options = new CSharpParseOptions(LanguageVersion.CSharp6, DocumentationMode.Parse, SourceCodeKind.Script); }
static void Main(string[] args) { if (args.Length < 2) { Console.WriteLine("Usage: HdfsWordCount <checkpointDirectory> <inputDirectory>"); return; } string checkpointPath = args[0]; string inputDir = args[1]; StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath, () => { var sparkConf = new SparkConf(); sparkConf.SetAppName("HdfsWordCount"); var sc = new SparkContext(sparkConf); StreamingContext context = new StreamingContext(sc, 30000); context.Checkpoint(checkpointPath); var lines = context.TextFileStream(inputDir); var words = lines.FlatMap(l => l.Split(' ')); var pairs = words.Map(w => new KeyValuePair<string, int>(w, 1)); var wordCounts = pairs.ReduceByKey((x, y) => x + y); wordCounts.ForeachRDD((time, rdd) => { Console.WriteLine("-------------------------------------------"); Console.WriteLine("Time: {0}", time); Console.WriteLine("-------------------------------------------"); object[] taken = rdd.Take(10); foreach (object record in taken) { Console.WriteLine(record); } Console.WriteLine(); }); return context; }); ssc.Start(); ssc.AwaitTermination(); ssc.Stop(); }