private void Initialize() { int index = 0; foreach (var field in schema.Fields) { if (field.DataType is ArrayType) { Func <DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) => { StructField[] fields = new StructField[length]; for (int i = 0; i < length; i++) { fields[i] = new StructField(string.Format("_array_{0}", i), dataType); } return(new StructType(fields)); }; var elementType = (field.DataType as ArrayType).ElementType; // Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)), // then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList; // In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[]. object[] valueOfArray = values[index] is ArrayList ? (values[index] as ArrayList).ToArray() : values[index] as object[]; if (valueOfArray == null) { throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name); } values[index] = new RowImpl(valueOfArray, elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).values; } else if (field.DataType is MapType) { //TODO throw new NotImplementedException(); } else if (field.DataType is StructType) { dynamic value = values[index]; if (value != null) { var subRow = new RowImpl(values[index], field.DataType as StructType); values[index] = subRow; } } else if (field.DataType is DecimalType) { //TODO throw new NotImplementedException(); } else if (field.DataType is DateType) { //TODO throw new NotImplementedException(); } else if (field.DataType is StringType) { if (values[index] != null) { values[index] = values[index].ToString(); } } else { values[index] = values[index]; } index++; } }
private Func <dynamic, dynamic>[] ConstructPickleConverters() { var funcs = new Func <dynamic, dynamic> [fields.Count]; int index = 0; foreach (var field in fields) { if (field.DataType is StringType) { funcs[index] = x => x?.ToString(); } /*else if (field.DataType is LongType) * { * funcs[index] = x => x==null?null:(dynamic)(long)x ; * }*/ /*else if (field.DataType is DateType) * { * funcs[index] = x => x; * }*/ else if (field.DataType is ArrayType) { Func <DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) => { StructField[] f = new StructField[length]; for (int i = 0; i < length; i++) { f[i] = new StructField(string.Format("_array_{0}", i), dataType); } return(new StructType(f)); }; var elementType = (field.DataType as ArrayType).ElementType; funcs[index] = x => { // Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)), // then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList; // In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[]. object[] valueOfArray = (x as ArrayList)?.ToArray() ?? x as object[]; if (valueOfArray == null) { throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name); } return(new RowImpl(valueOfArray, elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).Values); // TODO: this part may have some problems, not verified }; } else if (field.DataType is MapType) { //TODO throw new NotImplementedException(); } else if (field.DataType is StructType) { funcs[index] = x => x != null ? new RowImpl(x, field.DataType as StructType) : null; } else { funcs[index] = x => x; } index++; } return(funcs); }