示例#1
0
        internal static void DFTextFileLoadDataFrameSample()
        {
            var requestsSchema = StructType.CreateStructType(
                new List <StructField>
            {
                StructField.CreateStructField("guid", "string", false),
                StructField.CreateStructField("datacenter", "string", false),
                StructField.CreateStructField("abtestid", "string", false),
                StructField.CreateStructField("traffictype", "string", false),
            }
                );

            var requestsDateFrame = GetSqlContext().TextFile(SparkCLRSamples.Configuration.GetInputDataPath(RequestsLog), requestsSchema);

            requestsDateFrame.RegisterTempTable("requests");
            var guidFilteredDataFrame = GetSqlContext().Sql("SELECT guid, datacenter FROM requests where guid = '4628deca-139d-4121-b540-8341b9c05c2a'");

            guidFilteredDataFrame.Show();

            requestsDateFrame.ShowSchema();
            requestsDateFrame.Show();
            var count = requestsDateFrame.Count();

            guidFilteredDataFrame.ShowSchema();
            guidFilteredDataFrame.Show();
            var filteredCount = guidFilteredDataFrame.Count();

            if (SparkCLRSamples.Configuration.IsValidationEnabled)
            {
                Assert.AreEqual(10, count);
                Assert.AreEqual(1, filteredCount);
            }
        }
示例#2
0
        private static DataFrame GetMetricsDataFrame()
        {
            var metricsSchema = StructType.CreateStructType(
                new List <StructField>
            {
                StructField.CreateStructField("unknown", "string", false),
                StructField.CreateStructField("date", "string", false),
                StructField.CreateStructField("time", "string", false),
                StructField.CreateStructField("guid", "string", false),
                StructField.CreateStructField("lang", "string", false),
                StructField.CreateStructField("country", "string", false),
                StructField.CreateStructField("latency", "integer", false)
            }
                );

            return
                (GetSqlContext()
                 .TextFile(SparkCLRSamples.Configuration.GetInputDataPath(MetricsLog), metricsSchema));
        }