internal static void DFTextFileLoadDataFrameSample() { var requestsSchema = StructType.CreateStructType( new List <StructField> { StructField.CreateStructField("guid", "string", false), StructField.CreateStructField("datacenter", "string", false), StructField.CreateStructField("abtestid", "string", false), StructField.CreateStructField("traffictype", "string", false), } ); var requestsDateFrame = GetSqlContext().TextFile(SparkCLRSamples.Configuration.GetInputDataPath(RequestsLog), requestsSchema); requestsDateFrame.RegisterTempTable("requests"); var guidFilteredDataFrame = GetSqlContext().Sql("SELECT guid, datacenter FROM requests where guid = '4628deca-139d-4121-b540-8341b9c05c2a'"); guidFilteredDataFrame.Show(); requestsDateFrame.ShowSchema(); requestsDateFrame.Show(); var count = requestsDateFrame.Count(); guidFilteredDataFrame.ShowSchema(); guidFilteredDataFrame.Show(); var filteredCount = guidFilteredDataFrame.Count(); if (SparkCLRSamples.Configuration.IsValidationEnabled) { Assert.AreEqual(10, count); Assert.AreEqual(1, filteredCount); } }
private static DataFrame GetMetricsDataFrame() { var metricsSchema = StructType.CreateStructType( new List <StructField> { StructField.CreateStructField("unknown", "string", false), StructField.CreateStructField("date", "string", false), StructField.CreateStructField("time", "string", false), StructField.CreateStructField("guid", "string", false), StructField.CreateStructField("lang", "string", false), StructField.CreateStructField("country", "string", false), StructField.CreateStructField("latency", "integer", false) } ); return (GetSqlContext() .TextFile(SparkCLRSamples.Configuration.GetInputDataPath(MetricsLog), metricsSchema)); }