internal RowImpl(object data, RowSchema schema) { if (data is object[]) { values = data as object[]; } else if (data is List <object> ) { values = (data as List <object>).ToArray(); } else { throw new Exception(string.Format("unexpeted type {0}", data.GetType())); } this.schema = schema; columnCount = values.Count(); int schemaColumnCount = this.schema.columns.Count(); if (columnCount != schemaColumnCount) { throw new Exception(string.Format("column count inferred from data ({0}) and schema ({1}) mismatch", columnCount, schemaColumnCount)); } Initialize(); }
/// <summary> /// Returns all of Rows in this DataFrame /// </summary> public IEnumerable <Row> Collect() { if (rowSchema == null) { rowSchema = RowSchema.ParseRowSchemaFromJson(Schema.ToJson()); } IRDDProxy rddProxy = dataFrameProxy.JavaToCSharp(); RDD <Row> rdd = new RDD <Row>(rddProxy, sparkContext, SerializedMode.Row); int port = rddProxy.CollectAndServe(); foreach (var item in rdd.Collect(port)) { yield return(new RowImpl(item, rowSchema)); } }
internal static ColumnSchema ParseColumnSchemaFromJson(string json) { ColumnSchema col = new ColumnSchema(); JObject joField = JObject.Parse(json); col.name = joField["name"].ToString(); col.nullable = (bool)(joField["nullable"]); JToken jtType = joField["type"]; if (jtType.Type == JTokenType.String) { col.type = new RowSchema(joField["type"].ToString()); } else { col.type = RowSchema.ParseRowSchemaFromJson(joField["type"].ToString()); } return(col); }