コード例 #1
0
ファイル: Row.cs プロジェクト: zhangf911/SparkCLR
        private void Initialize()
        {
            int index = 0;

            foreach (var field in schema.Fields)
            {
                if (field.DataType is ArrayType)
                {
                    Func <DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
                    {
                        StructField[] fields = new StructField[length];
                        for (int i = 0; i < length; i++)
                        {
                            fields[i] = new StructField(string.Format("_array_{0}", i), dataType);
                        }
                        return(new StructType(fields));
                    };
                    var elementType = (field.DataType as ArrayType).ElementType;

                    // Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)),
                    // then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
                    // In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
                    object[] valueOfArray = values[index] is ArrayList ? (values[index] as ArrayList).ToArray() : values[index] as object[];
                    if (valueOfArray == null)
                    {
                        throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
                    }

                    values[index] = new RowImpl(valueOfArray, elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).values;
                }
                else if (field.DataType is MapType)
                {
                    //TODO
                    throw new NotImplementedException();
                }
                else if (field.DataType is StructType)
                {
                    dynamic value = values[index];
                    if (value != null)
                    {
                        var subRow = new RowImpl(values[index], field.DataType as StructType);
                        values[index] = subRow;
                    }
                }
                else if (field.DataType is DecimalType)
                {
                    //TODO
                    throw new NotImplementedException();
                }
                else if (field.DataType is DateType)
                {
                    //TODO
                    throw new NotImplementedException();
                }
                else if (field.DataType is StringType)
                {
                    if (values[index] != null)
                    {
                        values[index] = values[index].ToString();
                    }
                }
                else
                {
                    values[index] = values[index];
                }
                index++;
            }
        }
コード例 #2
0
        private Func <dynamic, dynamic>[] ConstructPickleConverters()
        {
            var funcs = new Func <dynamic, dynamic> [fields.Count];
            int index = 0;

            foreach (var field in fields)
            {
                if (field.DataType is StringType)
                {
                    funcs[index] = x => x?.ToString();
                }

                /*else if (field.DataType is LongType)
                 * {
                 *      funcs[index] = x => x==null?null:(dynamic)(long)x ;
                 * }*/
                /*else if (field.DataType is DateType)
                 * {
                 *      funcs[index] = x => x;
                 * }*/
                else if (field.DataType is ArrayType)
                {
                    Func <DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
                    {
                        StructField[] f = new StructField[length];
                        for (int i = 0; i < length; i++)
                        {
                            f[i] = new StructField(string.Format("_array_{0}", i), dataType);
                        }
                        return(new StructType(f));
                    };
                    var elementType = (field.DataType as ArrayType).ElementType;
                    funcs[index] = x =>
                    {
                        // Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)),
                        // then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
                        // In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
                        object[] valueOfArray = (x as ArrayList)?.ToArray() ?? x as object[];
                        if (valueOfArray == null)
                        {
                            throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
                        }

                        return(new RowImpl(valueOfArray,
                                           elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).Values);             // TODO: this part may have some problems, not verified
                    };
                }
                else if (field.DataType is MapType)
                {
                    //TODO
                    throw new NotImplementedException();
                }
                else if (field.DataType is StructType)
                {
                    funcs[index] = x => x != null ? new RowImpl(x, field.DataType as StructType) : null;
                }
                else
                {
                    funcs[index] = x => x;
                }
                index++;
            }
            return(funcs);
        }