/// <summary> /// Main method which is to be the starting point /// </summary> /// <param name="args"></param> public static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); logger = LoggerServiceFactory.GetLogger(typeof(WordCountExample)); try { string studioHome = Directory.GetParent("..\\..\\..\\..\\..\\..\\..\\..\\..\\").ToString(); string sparkHome = studioHome + @"\BigDataSDK\SDK\Spark\bin"; string appConfigFile = System.IO.Path.Combine(Directory.GetParent("..\\..\\").ToString(), "App.config"); cSharpRunner = new CSharpRunner(sparkHome, appConfigFile); cSharpRunner.UpdateConfigFile(configFile); // Starting CSharpRunner is essential to execute a C# Spark samples StartCSharpRunner(); if (cSharpRunner.IsCSharpRunnerStarted) { logger.LogInfo("CSharpRunner Started................."); WordCount(); DisplayEndInfo(); sparkContext.Stop(); cSharpRunner.process.Kill(); } else { DisplayEndInfo(); } } catch (Exception ex) { logger.LogError(ex.Message); } }
public static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(PiExample)); var sparkContext = new SparkContext(new SparkConf()); try { const int slices = 3; var numberOfItems = (int)Math.Min(100000L * slices, int.MaxValue); var values = new List <int>(numberOfItems); for (var i = 0; i <= numberOfItems; i++) { values.Add(i); } var rdd = sparkContext.Parallelize(values, slices); CalculatePiUsingAnonymousMethod(numberOfItems, rdd); CalculatePiUsingSerializedClassApproach(numberOfItems, rdd); Logger.LogInfo("Completed calculating the value of Pi"); } catch (Exception ex) { Logger.LogError("Error calculating Pi"); Logger.LogException(ex); } sparkContext.Stop(); }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRSamples)); Configuration = CommandlineArgumentProcessor.ProcessArugments(args); PrintLogLocation(); bool status = true; if (Configuration.IsDryrun) { status = SamplesRunner.RunSamples(); } else { SparkContext = CreateSparkContext(); SparkContext.SetCheckpointDir(Path.GetTempPath()); status = SamplesRunner.RunSamples(); PrintLogLocation(); ConsoleWriteLine("Completed running samples. Calling SparkContext.Stop() to tear down ..."); //following comment is necessary due to known issue in Spark. See https://issues.apache.org/jira/browse/SPARK-8333 ConsoleWriteLine("If this program (SparkCLRSamples.exe) does not terminate in 10 seconds, please manually terminate java process launched by this program!!!"); //TODO - add instructions to terminate java process SparkContext.Stop(); } if (Configuration.IsValidationEnabled && !status) { Environment.Exit(1); } }
public static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(PiExample)); var sparkContext = new SparkContext(new SparkConf()); try { const int slices = 3; var numberOfItems = (int)Math.Min(100000L * slices, int.MaxValue); var values = new List<int>(numberOfItems); for (var i = 0; i <= numberOfItems; i++) { values.Add(i); } var rdd = sparkContext.Parallelize(values, slices); CalculatePiUsingAnonymousMethod(numberOfItems, rdd); CalculatePiUsingSerializedClassApproach(numberOfItems, rdd); Logger.LogInfo("Completed calculating the value of Pi"); } catch (Exception ex) { Logger.LogError("Error calculating Pi"); Logger.LogException(ex); } sparkContext.Stop(); }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set var logger = LoggerServiceFactory.GetLogger(typeof(SparkXmlExample)); var inputXmlFilePath = args[0]; var outputXmlFilePath = args[1]; var sparkConf = new SparkConf(); sparkConf.SetAppName("myapp"); var sparkContext = new SparkContext(sparkConf); var sqlContext = new SqlContext(sparkContext); var df = sqlContext.Read() .Format("com.databricks.spark.xml") .Option("rowTag", "book") .Load(inputXmlFilePath); //"D:\temp\books.xml", "file:/D:/temp/books.xml" or "hdfs://temp/books.xml" df.ShowSchema(); var rowCount = df.Count(); logger.LogInfo("Row count is " + rowCount); var selectedData = df.Select("author", "@id"); selectedData.Write() .Format("com.databricks.spark.xml") .Option("rootTag", "books") .Option("rowTag", "book") .Save(outputXmlFilePath); //"D:\temp\booksUpdated.xml", "file:/D:/temp/booksUpdated.xml" or "hdfs://temp/booksUpdated.xml" sparkContext.Stop(); }
public void TestSparkContextStop() { var sparkContext = new SparkContext(null); Assert.IsNotNull((sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference); sparkContext.Stop(); Assert.IsNull((sparkContext.SparkContextProxy as MockSparkContextProxy).mockSparkContextReference); }
public void TestSparkContextProxy() { var sparkContext = new SparkContext("masterUrl", "appName"); sparkContext.AddFile(null); sparkContext.BinaryFiles(null, null); sparkContext.CancelAllJobs(); sparkContext.CancelJobGroup(null); sparkContext.EmptyRDD <string>(); sparkContext.GetLocalProperty(null); sparkContext.HadoopFile(null, null, null, null); sparkContext.HadoopRDD(null, null, null); sparkContext.NewAPIHadoopFile(null, null, null, null); sparkContext.NewAPIHadoopRDD(null, null, null); sparkContext.Parallelize <int>(new int[] { 1, 2, 3, 4, 5 }); sparkContext.SequenceFile(null, null, null, null, null, null); sparkContext.SetCheckpointDir(null); sparkContext.SetJobGroup(null, null); sparkContext.SetLocalProperty(null, null); sparkContext.SetLogLevel(null); sparkContext.TextFile(null); sparkContext.WholeTextFiles(null); sparkContext.Stop(); sparkContext.Union <string>(null); }
public static int Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(WordCountExample)); if (args.Length != 1) { Console.Error.WriteLine("Usage: WordCount <file>"); return(1); } var sparkContext = new SparkContext(new SparkConf().SetAppName("MobiusWordCount")); try { var lines = sparkContext.TextFile(args[0]); var counts = lines .FlatMap(x => x.Split(' ')) .Map(w => new KeyValuePair <string, int>(w, 1)) .ReduceByKey((x, y) => x + y); foreach (var wordcount in counts.Collect()) { Console.WriteLine("{0}: {1}", wordcount.Key, wordcount.Value); } } catch (Exception ex) { Logger.LogError("Error performing Word Count"); Logger.LogException(ex); } sparkContext.Stop(); return(0); }
public static int Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(WordCountExample)); if (args.Length != 1) { Console.Error.WriteLine("Usage: WordCount <file>"); return 1; } var sparkContext = new SparkContext(new SparkConf().SetAppName("MobiusWordCount")); try { var lines = sparkContext.TextFile(args[0]); var counts = lines .FlatMap(x => x.Split(' ')) .Map(w => new KeyValuePair<string, int>(w, 1)) .ReduceByKey((x, y) => x + y); foreach (var wordcount in counts.Collect()) { Console.WriteLine("{0}: {1}", wordcount.Key, wordcount.Value); } } catch (Exception ex) { Logger.LogError("Error performing Word Count"); Logger.LogException(ex); } sparkContext.Stop(); return 0; }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRSamples)); ProcessArugments(args); SparkContext = CreateSparkContext(); SparkContext.SetCheckpointDir(Path.GetTempPath()); RunSamples(); SparkContext.Stop(); }
static void Main(string[] args) { SparkContext = CreateSparkContext(); using (WebApp.Start<Startup>("http://localhost:9000/")) { Console.ReadLine(); } SparkContext.Stop(); }
public void TestAccumuatorSuccess() { var sc = new SparkContext(null); Accumulator <int> accumulator = sc.Accumulator <int>(0); // get accumulator server port and connect to accumuator server int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort; var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); sock.Connect(IPAddress.Loopback, serverPort); using (var s = new NetworkStream(sock)) { // write numUpdates int numUpdates = 1; SerDe.Write(s, numUpdates); // write update int key = 0; int value = 100; KeyValuePair <int, dynamic> update = new KeyValuePair <int, dynamic>(key, value); var ms = new MemoryStream(); var formatter = new BinaryFormatter(); formatter.Serialize(ms, update); byte[] sendBuffer = ms.ToArray(); SerDe.Write(s, sendBuffer.Length); SerDe.Write(s, sendBuffer); s.Flush(); byte[] receiveBuffer = new byte[1]; s.Read(receiveBuffer, 0, 1); Assert.AreEqual(accumulator.Value, value); // try to let service side to close gracefully sc.Stop(); try { numUpdates = 0; SerDe.Write(s, numUpdates); } catch { // do nothing here } } sock.Close(); }
static void Main(string[] args) { Console.WriteLine("sizeof(int) = " + sizeof(int) + ", sizeof(long) = " + sizeof(long) + ", Is64BitOperatingSystem = " + Environment.Is64BitOperatingSystem + ", Is64BitProcess = " + Environment.Is64BitProcess + ", OSVersion = " + Environment.OSVersion + ", MachineName = " + Environment.MachineName); var exe = Path.GetFileName(System.Reflection.Assembly.GetExecutingAssembly().CodeBase); if (args.Length < 1 || args[0] == "-h" || args[0] == "--help") { Console.WriteLine("Usage : {0} input-arguments", exe); Console.WriteLine("Example-1: {0} any-thing that you-want-to-write=input", exe); var mapCurrentDir = new Dictionary <PlatformID, string> { { PlatformID.Win32NT, "%CD%" }, { PlatformID.Win32S, "%CD%" }, { PlatformID.Win32Windows, "%CD%" }, { PlatformID.WinCE, "%CD%" }, { PlatformID.Unix, "$PWD" } }; var currentDirectory = string.Empty; if (mapCurrentDir.TryGetValue(Environment.OSVersion.Platform, out currentDirectory)) { Console.WriteLine(@"Example-2: {0} {1} arg2@*#:,+.-\/~ Pi* d:\tmp {2}", exe, currentDirectory, "\"jdbc:mysql://localhost:3306/lzdb?user=guest&password=abc123\""); } return; } var idx = 0; Log("args.Length = " + args.Length + Environment.NewLine + string.Join(Environment.NewLine, args.Select(arg => { idx++; return("args[" + idx + "] = " + arg); })) ); var singleValueRDD = new List <KeyValuePair <string, int> >( args.Select(arg => new KeyValuePair <string, int>(arg, 1)) ); idx = 0; singleValueRDD.ForEach(kv => Log(string.Format("src-pair[{0}] : {1} = {2}", idx++, kv.Key, kv.Value))); var sparkContext = new SparkContext(new SparkConf()); var rdd = sparkContext.Parallelize(singleValueRDD); Log(string.Format("Main() rdd = {0}", rdd)); var reduced = rdd.ReduceByKey((v1, v2) => v1 + v2); Log("reduced.count = " + reduced.Count()); sparkContext.Stop(); }
public void TestAccumuatorSuccess() { var sc = new SparkContext(null); Accumulator<int> accumulator = sc.Accumulator<int>(0); // get accumulator server port and connect to accumuator server int serverPort = (sc.SparkContextProxy as MockSparkContextProxy).AccumulatorServerPort; var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); sock.Connect(IPAddress.Loopback, serverPort); using (var s = new NetworkStream(sock)) { // write numUpdates int numUpdates = 1; SerDe.Write(s, numUpdates); // write update int key = 0; int value = 100; KeyValuePair<int, dynamic> update = new KeyValuePair<int, dynamic>(key, value); var ms = new MemoryStream(); var formatter = new BinaryFormatter(); formatter.Serialize(ms, update); byte[] sendBuffer = ms.ToArray(); SerDe.Write(s, sendBuffer.Length); SerDe.Write(s, sendBuffer); s.Flush(); byte[] receiveBuffer = new byte[1]; s.Read(receiveBuffer, 0, 1); Assert.AreEqual(accumulator.Value, value); // try to let service side to close gracefully sc.Stop(); try { numUpdates = 0; SerDe.Write(s, numUpdates); } catch { // do nothing here } } sock.Close(); }
static void StartOneTest(string pathPattern, int times, int totalTimes) { var beginTime = DateTime.Now; Logger.LogInfo($"Begin test[{times}]-{totalTimes} , will read : {pathPattern} . {GetCurrentProcessInfo()}"); var sc = new SparkContext(new SparkConf()); var mappingRDD = sc.TextFile(pathPattern).Map <string>(line => line).Cache(); Logger.LogInfo("RDD count = {0}", mappingRDD.Count()); mappingRDD.Unpersist(); var endTime = DateTime.Now; Logger.LogInfo($"End test[{times}]-{totalTimes} of {typeof(TxtStreamTest)}, used time = {(endTime - beginTime).TotalSeconds} s = {endTime - beginTime} . read = {pathPattern} ; {GetCurrentProcessInfo()}"); sc.Stop(); }
public void TestCleanUp() { sc.Stop(); try { using (var s = new NetworkStream(sock)) { int numUpdates = 0; SerDe.Write(s, numUpdates); } sock.Close(); } catch { // do nothing here } }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRSamples)); ProcessArugments(args); if (Configuration.IsDryrun) { RunSamples(); } else { SparkContext = CreateSparkContext(); SparkContext.SetCheckpointDir(Path.GetTempPath()); RunSamples(); SparkContext.Stop(); } System.Console.ReadLine(); }
public void TestCleanUp() { sc.Stop(); try { using (var s = sock.GetStream()) { int numUpdates = 0; SerDe.Write(s, numUpdates); } } catch { // do nothing here } finally { sock.Close(); } }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set var logger = LoggerServiceFactory.GetLogger(typeof(JdbcDataFrameExample)); //For SQL Server use the connection string formats below //"jdbc:sqlserver://localhost:1433;databaseName=Temp;integratedSecurity=true;" or //"jdbc:sqlserver://localhost;databaseName=Temp;user=MyUserName;password=myPassword;" var connectionString = args[0]; var tableName = args[1]; var sparkConf = new SparkConf(); var sparkContext = new SparkContext(sparkConf); var sqlContext = new SqlContext(sparkContext); var df = sqlContext .Read() .Jdbc(connectionString, tableName, new Dictionary<string, string>()); df.ShowSchema(); var rowCount = df.Count(); logger.LogInfo("Row count is " + rowCount); sparkContext.Stop(); }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set Logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRSamples)); ProcessArugments(args); PrintLogLocation(); if (Configuration.IsDryrun) { RunSamples(); } else { SparkContext = CreateSparkContext(); SparkContext.SetCheckpointDir(Path.GetTempPath()); RunSamples(); PrintLogLocation(); ConsoleWriteLine("Main", "Completed RunSamples. Calling SparkContext.Stop() to tear down ..."); ConsoleWriteLine("Main", "If the program does not terminate in 10 seconds, please manually terminate java process !!!"); SparkContext.Stop(); } }
private static void Main(string[] args) { var success = true; SparkContext = CreateSparkContext(); SparkContext.SetCheckpointDir(Path.GetTempPath()); var stopWatch = Stopwatch.StartNew(); var clockStart = stopWatch.Elapsed; try { Logger.Info("----- Running Pi example -----"); Pi(); var duration = stopWatch.Elapsed - clockStart; Logger.InfoFormat("----- Successfully finished running Pi example (duration={0}) -----", duration); } catch (Exception ex) { success = false; var duration = stopWatch.Elapsed - clockStart; Logger.InfoFormat("----- Error running Pi example (duration={0}) -----{1}{2}", duration, Environment.NewLine, ex); } Logger.Info("Completed running examples. Calling SparkContext.Stop() to tear down ..."); // following comment is necessary due to known issue in Spark. See https://issues.apache.org/jira/browse/SPARK-8333 Logger.Info("If this program (SparkCLRExamples.exe) does not terminate in 10 seconds, please manually terminate java process launched by this program!!!"); SparkContext.Stop(); if (!success) { Environment.Exit(1); } }
static void Main(string[] args) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set var logger = LoggerServiceFactory.GetLogger(typeof(JdbcDataFrameExample)); //For SQL Server use the connection string formats below //"jdbc:sqlserver://localhost:1433;databaseName=Temp;integratedSecurity=true;" or //"jdbc:sqlserver://localhost;databaseName=Temp;user=MyUserName;password=myPassword;" var connectionString = args[0]; var tableName = args[1]; var sparkConf = new SparkConf(); var sparkContext = new SparkContext(sparkConf); var sqlContext = new SqlContext(sparkContext); var df = sqlContext .Read() .Jdbc(connectionString, tableName, new Dictionary <string, string>()); df.ShowSchema(); var rowCount = df.Count(); logger.LogInfo("Row count is " + rowCount); sparkContext.Stop(); }
public void TestSparkContextProxy() { var sparkContext = new SparkContext("masterUrl", "appName"); sparkContext.AddFile(null); sparkContext.BinaryFiles(null, null); sparkContext.CancelAllJobs(); sparkContext.CancelJobGroup(null); sparkContext.EmptyRDD<string>(); sparkContext.GetLocalProperty(null); sparkContext.HadoopFile(null, null, null, null); sparkContext.HadoopRDD(null, null, null); sparkContext.NewAPIHadoopFile(null, null, null, null); sparkContext.NewAPIHadoopRDD(null, null, null); sparkContext.Parallelize<int>(new int[] { 1, 2, 3, 4, 5 }); sparkContext.SequenceFile(null, null, null, null, null, null); sparkContext.SetCheckpointDir(null); sparkContext.SetJobGroup(null, null); sparkContext.SetLocalProperty(null, null); sparkContext.SetLogLevel(null); sparkContext.TextFile(null); sparkContext.WholeTextFiles(null); sparkContext.Stop(); sparkContext.Union<string>(null); }
private static void StopSparkContext() { SparkContext.Stop(); }