/// <summary> /// Used to construct a Row /// </summary> /// <returns></returns> public Row GetRow() { var schema = DataType.ParseDataTypeFromJson(Schema) as StructType; var row = new RowImpl(GetValues(Values), schema); //Resetting schema here so that rows from multiple DataFrames can be processed in the same AppDomain //next row will have schema - so resetting is fine isCurrentSchemaSet = false; currentSchema = null; return(row); }
private void Initialize() { int index = 0; foreach (var col in schema.columns) { if (col.type.columns.Any()) // this column itself is a sub-row { object value = values[index]; if (value != null) { RowImpl subRow = new RowImpl(values[index], col.type); values[index] = subRow; } } index++; } }
private void Initialize() { int index = 0; foreach (var field in schema.Fields) { if (field.DataType is ArrayType) { Func <DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) => { StructField[] fields = new StructField[length]; for (int i = 0; i < length; i++) { fields[i] = new StructField(string.Format("_array_{0}", i), dataType); } return(new StructType(fields)); }; var elementType = (field.DataType as ArrayType).ElementType; // Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)), // then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList; // In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[]. object[] valueOfArray = values[index] is ArrayList ? (values[index] as ArrayList).ToArray() : values[index] as object[]; if (valueOfArray == null) { throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name); } values[index] = new RowImpl(valueOfArray, elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).values; } else if (field.DataType is MapType) { //TODO throw new NotImplementedException(); } else if (field.DataType is StructType) { dynamic value = values[index]; if (value != null) { var subRow = new RowImpl(values[index], field.DataType as StructType); values[index] = subRow; } } else if (field.DataType is DecimalType) { //TODO throw new NotImplementedException(); } else if (field.DataType is DateType) { //TODO throw new NotImplementedException(); } else if (field.DataType is StringType) { if (values[index] != null) { values[index] = values[index].ToString(); } } else { values[index] = values[index]; } index++; } }