Esempio n. 1
0
        /// <summary>
        /// Used to construct a Row
        /// </summary>
        /// <returns></returns>
        public Row GetRow()
        {
            var schema = DataType.ParseDataTypeFromJson(Schema) as StructType;
            var row    = new RowImpl(GetValues(Values), schema);

            //Resetting schema here so that rows from multiple DataFrames can be processed in the same AppDomain
            //next row will have schema - so resetting is fine
            isCurrentSchemaSet = false;
            currentSchema      = null;
            return(row);
        }
Esempio n. 2
0
        private void Initialize()
        {
            int index = 0;

            foreach (var col in schema.columns)
            {
                if (col.type.columns.Any()) // this column itself is a sub-row
                {
                    object value = values[index];
                    if (value != null)
                    {
                        RowImpl subRow = new RowImpl(values[index], col.type);
                        values[index] = subRow;
                    }
                }

                index++;
            }
        }
Esempio n. 3
0
        private void Initialize()
        {
            int index = 0;

            foreach (var field in schema.Fields)
            {
                if (field.DataType is ArrayType)
                {
                    Func <DataType, int, StructType> convertArrayTypeToStructTypeFunc = (dataType, length) =>
                    {
                        StructField[] fields = new StructField[length];
                        for (int i = 0; i < length; i++)
                        {
                            fields[i] = new StructField(string.Format("_array_{0}", i), dataType);
                        }
                        return(new StructType(fields));
                    };
                    var elementType = (field.DataType as ArrayType).ElementType;

                    // Note: When creating object from json, PySpark converts Json array to Python List (https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/types.py, _create_cls(dataType)),
                    // then Pyrolite unpickler converts Python List to C# ArrayList (https://github.com/irmen/Pyrolite/blob/v4.10/README.txt). So values[index] should be of type ArrayList;
                    // In case Python changes its implementation, which means value is not of type ArrayList, try cast to object[] because Pyrolite unpickler convert Python Tuple to C# object[].
                    object[] valueOfArray = values[index] is ArrayList ? (values[index] as ArrayList).ToArray() : values[index] as object[];
                    if (valueOfArray == null)
                    {
                        throw new ArgumentException("Cannot parse data of ArrayType: " + field.Name);
                    }

                    values[index] = new RowImpl(valueOfArray, elementType as StructType ?? convertArrayTypeToStructTypeFunc(elementType, valueOfArray.Length)).values;
                }
                else if (field.DataType is MapType)
                {
                    //TODO
                    throw new NotImplementedException();
                }
                else if (field.DataType is StructType)
                {
                    dynamic value = values[index];
                    if (value != null)
                    {
                        var subRow = new RowImpl(values[index], field.DataType as StructType);
                        values[index] = subRow;
                    }
                }
                else if (field.DataType is DecimalType)
                {
                    //TODO
                    throw new NotImplementedException();
                }
                else if (field.DataType is DateType)
                {
                    //TODO
                    throw new NotImplementedException();
                }
                else if (field.DataType is StringType)
                {
                    if (values[index] != null)
                    {
                        values[index] = values[index].ToString();
                    }
                }
                else
                {
                    values[index] = values[index];
                }
                index++;
            }
        }