Exemplo n.º 1
0
Arquivo: Utils.cs Projeto: xyuan/Dryad
        public static IQueryable <int> GetGroupByReduceDataSet(DryadLinqContext context)
        {
            //IEnumerable<IEnumerable<int>> data = new int[][] {
            //            Enumerable.Range(1,100).ToArray(),
            //            Enumerable.Range(101,100).ToArray(),
            //    };

            IQueryable <int> input          = context.FromEnumerable(new int[1]);
            IQueryable <int> range          = input.Apply(x => Enumerable.Range(0, 2)); // {0, 1}
            IQueryable <int> partitions     = range.HashPartition(x => x, 2);           // create 2 partitions
            IQueryable <int> rangePartition = partitions.SelectMany(x => Enumerable.Range(x * 100 + 1, 100));

            return(rangePartition);
        }
Exemplo n.º 2
0
Arquivo: Utils.cs Projeto: xyuan/Dryad
        public static IQueryable <int> GetRangePartitionDataSet(DryadLinqContext context)
        {
            // we need a lot of data to ensure sampler will get some data.
            // A few thousand should suffice.
            //IEnumerable<IEnumerable<int>> data = new int[][] {
            //            Enumerable.Range(1,1000).ToArray(),
            //            Enumerable.Range(20000,2000).ToArray(),
            //            Enumerable.Range(40000,5000).ToArray(),
            //    };

            IQueryable <int> input          = context.FromEnumerable(new int[1]);
            IQueryable <int> range          = input.Apply(x => Enumerable.Range(0, 3)); // {0, 1, 2}
            IQueryable <int> partitions     = range.HashPartition(x => x, 3);           // create 3 partitions
            IQueryable <int> rangePartition = partitions.SelectMany(x => Enumerable.Range(x * 20000 + 1, 1000));

            return(rangePartition);
        }
Exemplo n.º 3
0
        public static void WordCountExample()
        {
#if local
			// This overload runs the computation on your local computer using a single worker
            var config = new DryadLinqContext(1);

            var lines = new LineRecord[] { new LineRecord("This is a dummy line for a short job") };
            // You can create inputs from any IEnumerable source using this method
            var input = config.FromEnumerable(lines);
#else
			string clusterName = "Replace with your HDInsight 3.0 cluster name";
            // to use the davinci.txt example input below, select your cluster's default
            // storage account and container, which automatically includes the sample text
			string accountName = "Replace with a storage account name";
			string containerName = "Replace with a storage container name";

			// This overload creates an Azure-based computation
            var config = new DryadLinqContext(clusterName);
            config.JobFriendlyName = "DryadLINQ Sample Wordcount";

            // plain text files should be read as type LineRecord
			var input = config.FromStore<LineRecord>(AzureUtils.ToAzureUri(accountName, containerName,
					                                 "example/data/gutenberg/davinci.txt"));
#endif

            var words = input.SelectMany(x => x.Line.Split(' '));
            var groups = words.GroupBy(x => x);
            var counts = groups.Select(x => new KeyValuePair<string, int>(x.Key, x.Count()));
            var toOutput = counts.Select(x => new LineRecord(String.Format("{0}: {1}", x.Key, x.Value)));

#if local
            // any collection computed by the query can be materialized back at the client,
            // not just the 'output' collection. For large collections this is expensive!
            foreach (LineRecord line in toOutput)
            {
                Console.WriteLine(line.Line);
            }
#else
            // the 'true' parameter to ToStore means the output will be over-written if you run
            // the job more than once
            var info = toOutput.ToStore(AzureUtils.ToAzureUri(accountName, containerName,
			           "wc-out.txt"), true).SubmitAndWait();
#endif
        }
Exemplo n.º 4
0
        public static void WordCountExample()
        {
#if local
            // This overload runs the computation on your local computer using a single worker
            var config = new DryadLinqContext(1);

            var lines = new LineRecord[] { new LineRecord("This is a dummy line for a short job") };
            // You can create inputs from any IEnumerable source using this method
            var input = config.FromEnumerable(lines);
#else
            string clusterName = "Replace with your HDInsight 3.0 cluster name";
            // to use the davinci.txt example input below, select your cluster's default
            // storage account and container, which automatically includes the sample text
            string accountName   = "Replace with a storage account name";
            string containerName = "Replace with a storage container name";

            // This overload creates an Azure-based computation
            var config = new DryadLinqContext(clusterName);
            config.JobFriendlyName = "DryadLINQ Sample Wordcount";

            // plain text files should be read as type LineRecord
            var input = config.FromStore <LineRecord>(AzureUtils.ToAzureUri(accountName, containerName,
                                                                            "example/data/gutenberg/davinci.txt"));
#endif

            var words    = input.SelectMany(x => x.Line.Split(' '));
            var groups   = words.GroupBy(x => x);
            var counts   = groups.Select(x => new KeyValuePair <string, int>(x.Key, x.Count()));
            var toOutput = counts.Select(x => new LineRecord(String.Format("{0}: {1}", x.Key, x.Value)));

#if local
            // any collection computed by the query can be materialized back at the client,
            // not just the 'output' collection. For large collections this is expensive!
            foreach (LineRecord line in toOutput)
            {
                Console.WriteLine(line.Line);
            }
#else
            // the 'true' parameter to ToStore means the output will be over-written if you run
            // the job more than once
            var info = toOutput.ToStore(AzureUtils.ToAzureUri(accountName, containerName,
                                                              "wc-out.txt"), true).SubmitAndWait();
#endif
        }
Exemplo n.º 5
0
Arquivo: Utils.cs Projeto: xyuan/Dryad
        public static IQueryable <int> GetSimpleFileSets(DryadLinqContext context)
        {
            //IEnumerable<IEnumerable<int>> data = new int[][]
            //        {
            //            new[] { 0, 1, 2, 3 },
            //            new[] { 4, 5, 6, 7 },
            //            new[] { 8, 9, 10, 11},
            //        };

            //IQueryable<LineRecord> input = context.FromStore<LineRecord>(AzureUtils.ToAzureUri(Config.accountName, Config.containerName,
            //                                            "unittest/inputdata/SimpleFile.txt"));
            IQueryable <int> input          = context.FromEnumerable(new int[1]);
            IQueryable <int> range          = input.Apply(x => Enumerable.Range(0, 3)); // {0, 1, 2}
            IQueryable <int> partitions     = range.HashPartition(x => x, 3);           // create 3 partitions
            IQueryable <int> rangePartition = partitions.SelectMany(x => Enumerable.Range(x * 4, 4));

            //IQueryable<int> store = rangePartition.ToStore(@"unittest/inputdata/SimpleFile.txt");
            return(rangePartition);
        }
Exemplo n.º 6
0
        public static void WordCountExample()
        {
#if local
			// This overload runs the computation on your local computer using a single worker
            var config = new DryadLinqContext(1);

            var lines = new LineRecord[] { new LineRecord("This is a dummy line for a short job") };
            // You can create inputs from any IEnumerable source using this method
            var input = config.FromEnumerable(lines);
#else
#if azure
			string clusterName = "Replace with your HDInsight 3.1 cluster name";
            // to use the davinci.txt example input below, select your cluster's default
            // storage account and container, which automatically includes the sample text
			string accountName = "Replace with a storage account name";
			string containerName = "Replace with a storage container name";

			// This overload creates an Azure-based computation
            var config = new DryadLinqContext(clusterName);
            config.JobFriendlyName = "DryadLINQ Sample Wordcount";

            // plain text files should be read as type LineRecord
			var input = config.FromStore<LineRecord>(Utils.ToAzureUri(accountName, containerName,
					                                 "example/data/gutenberg/davinci.txt"));
#else
            // to use a yarn cluster, fill in the username, resource node machine name and port, and name node and hdfs port below (use -1 for the default hdfs port).
            string user = "******";
            string resourceNode = "Replace with the name of the computer your resource node is running on";
            int rmPort = 8088;
            string nameNode = "Replace with the name of the computer your name node is running on";
            int hdfsPort = -1;
            // set the YARN queue to submit your job on below. Leave null to use the default queue
            string queue = null;
            // set the number of worker containers to start for the DryadLINQ job below
            int numberOfWorkers = 2;
            // set the amount of memory requested for the DryadLINQ job manager container below: 8GB should be enough for even the largest jobs, and 2GB will normally suffice
            int amMemoryMB = 2000;
            // set the amount of memory requested for the DryadLINQ worker containers below. The amount needed will depend on the code you are running
            int workerMemoryMB = 8000;
			// This overload runs the computation on your local computer using a single worker
            var cluster = new DryadLinqYarnCluster(user, numberOfWorkers, amMemoryMB, workerMemoryMB, queue, resourceNode, rmPort, nameNode, hdfsPort);

            var config = new DryadLinqContext(cluster);

            var lines = new LineRecord[] { new LineRecord("This is a dummy line for a short job") };
            // You can create inputs from any IEnumerable source using this method
            var input = config.FromEnumerable(lines);
#endif
#endif

            var words = input.SelectMany(x => x.Line.Split(' '));
            var groups = words.GroupBy(x => x);
            var counts = groups.Select(x => new KeyValuePair<string, int>(x.Key, x.Count()));
            var toOutput = counts.Select(x => new LineRecord(String.Format("{0}: {1}", x.Key, x.Value)));

#if azure
            // the 'true' parameter to ToStore means the output will be over-written if you run
            // the job more than once
            var info = toOutput.ToStore(Utils.ToAzureUri(accountName, containerName,
			           "wc-out.txt"), true).SubmitAndWait();
#else
            // any collection computed by the query can be materialized back at the client,
            // not just the 'output' collection. For large collections this is expensive!
            foreach (LineRecord line in toOutput)
            {
                Console.WriteLine(line.Line);
            }
#endif
        }
Exemplo n.º 7
0
        public static bool PlainEnumerableAsDryadQueryToStoreSubmit(DryadLinqContext context) 
        {
            string testName = "PlainEnumerableAsDryadQueryToStoreSubmit";
            TestLog.TestStart(testName);

            bool passed = true;
            try
            {
                context.LocalDebug = false;
                string outFile = "unittest/output/PlainEnumerableAsDryadQueryToStoreSubmit";

                int[] plainData = { 5, 6, 7 };

                var q = context.FromEnumerable(plainData)
                               .ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, outFile));
                DryadLinqJobInfo info = q.Submit();
                info.Wait();

                passed &= Validate.outFileExists(outFile);
            }
            catch (Exception Ex)
            {
                TestLog.Message("Error: " + Ex.Message);
                passed &= false;
            }

            TestLog.LogResult(new TestResult(testName, context, passed));
            return passed;
        }