예제 #1
0
 public void TestRddUnion()
 {
     var sparkContext = new SparkContext(null);
     var rdd = sparkContext.TextFile(@"c:\path\to\rddinput.txt");
     var rdd2 = sparkContext.TextFile(@"c:\path\to\rddinput2.txt");
     var unionRdd = rdd.Union(rdd2);
     var paramValuesToUnionMethod = ((unionRdd.RddProxy as MockRddProxy).mockRddReference as object[]);
     var paramValuesToTextFileMethodInRdd1 = (paramValuesToUnionMethod[0] as MockRddProxy).mockRddReference as object[];
     Assert.AreEqual(@"c:\path\to\rddinput.txt", paramValuesToTextFileMethodInRdd1[0]);
     var paramValuesToTextFileMethodInRdd2 = (paramValuesToUnionMethod[1] as MockRddProxy).mockRddReference as object[];
     Assert.AreEqual(@"c:\path\to\rddinput2.txt", paramValuesToTextFileMethodInRdd2[0]);
 }
예제 #2
0
        public void TestRddUnion()
        {
            var sparkContext                      = new SparkContext(null);
            var rdd                               = sparkContext.TextFile(@"c:\path\to\rddinput.txt");
            var rdd2                              = sparkContext.TextFile(@"c:\path\to\rddinput2.txt");
            var unionRdd                          = rdd.Union(rdd2);
            var paramValuesToUnionMethod          = ((unionRdd.RddProxy as MockRddProxy).mockRddReference as object[]);
            var paramValuesToTextFileMethodInRdd1 = (paramValuesToUnionMethod[0] as MockRddProxy).mockRddReference as object[];

            Assert.AreEqual(@"c:\path\to\rddinput.txt", paramValuesToTextFileMethodInRdd1[0]);
            var paramValuesToTextFileMethodInRdd2 = (paramValuesToUnionMethod[1] as MockRddProxy).mockRddReference as object[];

            Assert.AreEqual(@"c:\path\to\rddinput2.txt", paramValuesToTextFileMethodInRdd2[0]);
        }
예제 #3
0
파일: Program.cs 프로젝트: valmac/Mobius
        public static int Main(string[] args)
        {
            LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set
            Logger = LoggerServiceFactory.GetLogger(typeof(WordCountExample));

            if (args.Length != 1)
            {
                Console.Error.WriteLine("Usage: WordCount  <file>");
                return(1);
            }

            var sparkContext = new SparkContext(new SparkConf().SetAppName("MobiusWordCount"));

            try
            {
                var lines  = sparkContext.TextFile(args[0]);
                var counts = lines
                             .FlatMap(x => x.Split(' '))
                             .Map(w => new KeyValuePair <string, int>(w, 1))
                             .ReduceByKey((x, y) => x + y);

                foreach (var wordcount in counts.Collect())
                {
                    Console.WriteLine("{0}: {1}", wordcount.Key, wordcount.Value);
                }
            }
            catch (Exception ex)
            {
                Logger.LogError("Error performing Word Count");
                Logger.LogException(ex);
            }

            sparkContext.Stop();
            return(0);
        }
예제 #4
0
 public static void Initialize(TestContext context)
 {
     var sparkContext = new SparkContext(null);
     var lines = sparkContext.TextFile(Path.GetTempFileName());
     var words = lines.FlatMap(l => l.Split(' '));
     doubles = words.Map(w => new KeyValuePair<string, int>(w, 1)).ReduceByKey((x, y) => x + y).Map(kv => (double)kv.Value);
 }
예제 #5
0
 public static void Initialize()
 {
     var sparkContext = new SparkContext(null);
     var lines = sparkContext.TextFile(Path.GetTempFileName());
     var words = lines.FlatMap(l => l.Split(' '));
     pairs = words.Map(w => new KeyValuePair<string, int>(w, 1));
 }
예제 #6
0
        /// <summary>
        /// To calculate the wordcount for the Hdfs file
        /// </summary>
        private static void WordCount()
        {
            var sparkConf = new SparkConf();

            sparkConf.SetAppName("MobiusWordCountC#");
            sparkConf.SetMaster("yarn");
            sparkContext = new SparkContext(sparkConf);
            try
            {
                var lines  = sparkContext.TextFile(hdfsFile);
                var counts = lines
                             .FlatMap(x => x.Split(' '))
                             .Map(w => new Tuple <string, int>(w, 1))
                             .ReduceByKey((x, y) => x + y);
                logger.LogInfo("**********************************************");

                foreach (var wordcount in counts.Collect())
                {
                    Console.WriteLine("{0}: {1}", wordcount.Item1, wordcount.Item2);
                }

                logger.LogInfo("**********************************************");
                logger.LogInfo("Executed Successfully.................");
            }
            catch (Exception ex)
            {
                logger.LogError("Error performing Word Count");
                logger.LogException(ex);
            }
        }
예제 #7
0
        public static void Initialize()
        {
            var sparkContext = new SparkContext(null);
            var lines        = sparkContext.TextFile(Path.GetTempFileName());

            words = lines.FlatMap(l => l.Split(' '));
        }
예제 #8
0
파일: Program.cs 프로젝트: corba777/Mobius
        public static int Main(string[] args)
        {
            LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set
            Logger = LoggerServiceFactory.GetLogger(typeof(WordCountExample));

            if (args.Length != 1)
            {
                Console.Error.WriteLine("Usage: WordCount  <file>");
                return 1;
            }

            var sparkContext = new SparkContext(new SparkConf().SetAppName("MobiusWordCount"));

            try
            {
                var lines = sparkContext.TextFile(args[0]);
                var counts = lines
                    .FlatMap(x =>  x.Split(' '))
                    .Map(w => new KeyValuePair<string, int>(w, 1))
                    .ReduceByKey((x, y) => x + y);

                foreach (var wordcount in counts.Collect())
                {
                    Console.WriteLine("{0}: {1}", wordcount.Key, wordcount.Value);
                }
            }
            catch (Exception ex)
            {
                Logger.LogError("Error performing Word Count");
                Logger.LogException(ex);
            }

            sparkContext.Stop();
            return 0;
        }
예제 #9
0
        public void TestSparkContextProxy()
        {
            var sparkContext = new SparkContext("masterUrl", "appName");

            sparkContext.AddFile(null);
            sparkContext.BinaryFiles(null, null);
            sparkContext.CancelAllJobs();
            sparkContext.CancelJobGroup(null);
            sparkContext.EmptyRDD <string>();
            sparkContext.GetLocalProperty(null);
            sparkContext.HadoopFile(null, null, null, null);
            sparkContext.HadoopRDD(null, null, null);
            sparkContext.NewAPIHadoopFile(null, null, null, null);
            sparkContext.NewAPIHadoopRDD(null, null, null);
            sparkContext.Parallelize <int>(new int[] { 1, 2, 3, 4, 5 });
            sparkContext.SequenceFile(null, null, null, null, null, null);
            sparkContext.SetCheckpointDir(null);
            sparkContext.SetJobGroup(null, null);
            sparkContext.SetLocalProperty(null, null);
            sparkContext.SetLogLevel(null);
            sparkContext.TextFile(null);
            sparkContext.WholeTextFiles(null);
            sparkContext.Stop();
            sparkContext.Union <string>(null);
        }
        public IEnumerable<Model> Get()
        {
            _sparkContext = Program.SparkContext;
            var crimeDataFrame = GetSqlContext()
                .TextFile(CrimeFilePath)
                .Cache();

            var tempTdd = _sparkContext.TextFile(CrimeFilePath)
                .Map(l => new object[]
                        {
                            int.Parse(l.Substring(0, 3)),
                            int.Parse(l.Substring(4, 3)),
                            int.Parse(l.Substring(8, 4)),
                        });

            var data = GetSqlContext().CreateDataFrame(tempTdd, new StructType(new List<StructField>
                        {
                            new StructField("Field1", new IntegerType()),
                            new StructField("Field2", new IntegerType()),
                            new StructField("Field3", new IntegerType())
                        }));

            data.Show();
            data.RegisterTempTable("data");
            
            return GetSqlContext().Sql("SELECT Field1, Field2, Field3 FROM data")
                .Collect()
                .Select(l => new Model
                {
                    Field1 = l.Get("Field1"),
                    Field2 = l.Get("Field2"),
                    Field3 = l.Get("Field3"),
                }).ToList();
        }
예제 #11
0
 public void TestRddTextFile()
 {
     var sparkContext = new SparkContext(null);
     var rdd = sparkContext.TextFile(@"c:\path\to\rddinput.txt");
     var paramValuesToTextFileMethod = (rdd.RddProxy as MockRddProxy).mockRddReference as object[];
     Assert.AreEqual(@"c:\path\to\rddinput.txt", paramValuesToTextFileMethod[0]);
     Assert.AreEqual(0, int.Parse(paramValuesToTextFileMethod[1].ToString())); //checking default partitions
 }
예제 #12
0
        public static void Initialize()
        {
            var sparkContext = new SparkContext(null);
            var lines        = sparkContext.TextFile(Path.GetTempFileName());
            var words        = lines.FlatMap(l => l.Split(' '));

            doubles = words.Map(w => new KeyValuePair <string, int>(w, 1)).ReduceByKey((x, y) => x + y).Map(kv => (double)kv.Value);
        }
예제 #13
0
        public static void Initialize()
        {
            var sparkContext = new SparkContext(null);
            var lines        = sparkContext.TextFile(Path.GetTempFileName());
            var words        = lines.FlatMap(l => l.Split(' '));

            pairs = words.Map(w => new KeyValuePair <string, int>(w, 1));
        }
예제 #14
0
 public void TestSparkContextTextFile()
 {
     var sparkContext = new SparkContext(null);
     var rdd = sparkContext.TextFile(@"c:\path\to\rddinput.txt", 8);
     var paramValuesToTextFileMethod = (rdd.RddProxy as MockRddProxy).mockRddReference as object[];
     Assert.AreEqual(@"c:\path\to\rddinput.txt", paramValuesToTextFileMethod[0]);
     Assert.AreEqual(8, paramValuesToTextFileMethod[1]);
 }
예제 #15
0
        public void TestRddTextFile()
        {
            var sparkContext = new SparkContext(null);
            var rdd          = sparkContext.TextFile(@"c:\path\to\rddinput.txt");
            var paramValuesToTextFileMethod = (rdd.RddProxy as MockRddProxy).mockRddReference as object[];

            Assert.AreEqual(@"c:\path\to\rddinput.txt", paramValuesToTextFileMethod[0]);
            Assert.AreEqual(0, int.Parse(paramValuesToTextFileMethod[1].ToString())); //checking default partitions
        }
예제 #16
0
        public void TestSparkContextTextFile()
        {
            var sparkContext = new SparkContext(null);
            var rdd          = sparkContext.TextFile(@"c:\path\to\rddinput.txt", 8);
            var paramValuesToTextFileMethod = (rdd.RddProxy as MockRddProxy).mockRddReference as object[];

            Assert.AreEqual(@"c:\path\to\rddinput.txt", paramValuesToTextFileMethod[0]);
            Assert.AreEqual(8, paramValuesToTextFileMethod[1]);
        }
예제 #17
0
        public void TestGetDefaultPartitionNum()
        {
            var sparkContext = new SparkContext(null);
            var lines        = sparkContext.TextFile(Path.GetTempFileName(), 5);

            words = lines.FlatMap(l => l.Split(' '));

            var defaultNumPartitions = words.GetDefaultPartitionNum();

            Assert.AreEqual(2, defaultNumPartitions);
        }
예제 #18
0
        public void TestTextFile()
        {
            RDD <string> rdd  = _sc.TextFile($"{TestEnvironment.ResourceDirectory}people.txt");
            var          strs = new string[] { "Michael, 29", "Andy, 30", "Justin, 19" };

            Assert.Equal(strs, rdd.Collect());

            // Test a transformation so that SerializedMode is correctly propagated.
            RDD <int> intRdd = rdd.Map(str => 0);

            Assert.Equal(new[] { 0, 0, 0 }, intRdd.Collect());
        }
예제 #19
0
        static void StartOneTest(string pathPattern, int times, int totalTimes)
        {
            var beginTime = DateTime.Now;

            Logger.LogInfo($"Begin test[{times}]-{totalTimes} , will read : {pathPattern} . {GetCurrentProcessInfo()}");
            var sc         = new SparkContext(new SparkConf());
            var mappingRDD = sc.TextFile(pathPattern).Map <string>(line => line).Cache();

            Logger.LogInfo("RDD count = {0}", mappingRDD.Count());

            mappingRDD.Unpersist();
            var endTime = DateTime.Now;

            Logger.LogInfo($"End test[{times}]-{totalTimes} of {typeof(TxtStreamTest)}, used time = {(endTime - beginTime).TotalSeconds} s = {endTime - beginTime} . read = {pathPattern} ; {GetCurrentProcessInfo()}");

            sc.Stop();
        }
예제 #20
0
        public void TestRddMap()
        {
            var sparkContext = new SparkContext(null);
            var rdd = sparkContext.TextFile(@"c:\path\to\rddinput.txt");
            var rdd2 = rdd.Map(s => s.ToLower() + ".com");
            Assert.IsTrue(rdd2.GetType() == typeof(PipelinedRDD<string>));
            var pipelinedRdd = rdd2 as PipelinedRDD<string>;
            var func = pipelinedRdd.func;
            var result = func(1, new String[] { "ABC" });
            var output = result.First();
            Assert.AreEqual("ABC".ToLower() + ".com", output);

            var pipelinedRdd2 = rdd2.Map(s => "HTTP://" + s) as PipelinedRDD<string>;
            var func2 = pipelinedRdd2.func;
            var result2 = func2(1, new String[] { "ABC" });
            var output2 = result2.First();
            Assert.AreEqual("HTTP://" + ("ABC".ToLower() + ".com"), output2); //tolower and ".com" appended first before adding prefix due to the way func2 wraps func in implementation
        }
예제 #21
0
        public void TestRddMap()
        {
            var sparkContext = new SparkContext(null);
            var rdd          = sparkContext.TextFile(@"c:\path\to\rddinput.txt");
            var rdd2         = rdd.Map(s => s.ToLower() + ".com");

            Assert.IsTrue(rdd2.GetType() == typeof(PipelinedRDD <string>));
            var pipelinedRdd = rdd2 as PipelinedRDD <string>;
            var func         = pipelinedRdd.func;
            var result       = func(1, new String[] { "ABC" });
            var output       = result.First();

            Assert.AreEqual("ABC".ToLower() + ".com", output);

            var pipelinedRdd2 = rdd2.Map(s => "HTTP://" + s) as PipelinedRDD <string>;
            var func2         = pipelinedRdd2.func;
            var result2       = func2(1, new String[] { "ABC" });
            var output2       = result2.First();

            Assert.AreEqual("HTTP://" + ("ABC".ToLower() + ".com"), output2); //tolower and ".com" appended first before adding prefix due to the way func2 wraps func in implementation
        }
예제 #22
0
 public void TestSparkContextProxy()
 {
     var sparkContext = new SparkContext("masterUrl", "appName");
     sparkContext.AddFile(null);
     sparkContext.BinaryFiles(null, null);
     sparkContext.CancelAllJobs();
     sparkContext.CancelJobGroup(null);
     sparkContext.EmptyRDD<string>();
     sparkContext.GetLocalProperty(null);
     sparkContext.HadoopFile(null, null, null, null);
     sparkContext.HadoopRDD(null, null, null);
     sparkContext.NewAPIHadoopFile(null, null, null, null);
     sparkContext.NewAPIHadoopRDD(null, null, null);
     sparkContext.Parallelize<int>(new int[] { 1, 2, 3, 4, 5 });
     sparkContext.SequenceFile(null, null, null, null, null, null);
     sparkContext.SetCheckpointDir(null);
     sparkContext.SetJobGroup(null, null);
     sparkContext.SetLocalProperty(null, null);
     sparkContext.SetLogLevel(null);
     sparkContext.TextFile(null);
     sparkContext.WholeTextFiles(null);
     sparkContext.Stop();
     sparkContext.Union<string>(null);
 }
예제 #23
0
 private static RDD <string> getDataFromFile(SparkContext sc, string filename)
 {
     return((sc.TextFile(filename)).Filter(line => { return !line.StartsWith("#"); }));
 }
예제 #24
0
 public static void Initialize()
 {
     var sparkContext = new SparkContext(null);
     var lines = sparkContext.TextFile(Path.GetTempFileName());
     words = lines.FlatMap(l => l.Split(' '));
 }
예제 #25
0
파일: RDDTest.cs 프로젝트: corba777/Mobius
        public void TestGetDefaultPartitionNum()
        {
            var sparkContext = new SparkContext(null);
            var lines = sparkContext.TextFile(Path.GetTempFileName(), 5);
            words = lines.FlatMap(l => l.Split(' '));

            var defaultNumPartitions = words.GetDefaultPartitionNum();
            Assert.AreEqual(2, defaultNumPartitions);
        }