public override DataFrameColumn Add(DataFrameColumn column, bool inPlace = false)
        {
            if (Length != column.Length)
            {
                throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
            }
            StringDataFrameColumn ret = inPlace ? this : Clone();

            for (long i = 0; i < Length; i++)
            {
                ret[i] += column[i].ToString();
            }
            return(ret);
        }
        public StringDataFrameColumn Add(string value, bool inPlace = false)
        {
            StringDataFrameColumn ret = inPlace ? this : Clone();

            for (int i = 0; i < ret._stringBuffers.Count; i++)
            {
                IList <string> buffer    = ret._stringBuffers[i];
                int            bufferLen = buffer.Count;
                for (int j = 0; j < bufferLen; j++)
                {
                    buffer[j] += value;
                }
            }
            return(ret);
        }
        public static StringDataFrameColumn Add(string value, StringDataFrameColumn right)
        {
            StringDataFrameColumn ret = right.Clone();

            for (int i = 0; i < ret._stringBuffers.Count; i++)
            {
                IList <string> buffer    = ret._stringBuffers[i];
                int            bufferLen = buffer.Count;
                for (int j = 0; j < bufferLen; j++)
                {
                    buffer[j] = value + buffer[j];
                }
            }
            return(ret);
        }
示例#4
0
        private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int columnIndex)
        {
            PrimitiveDataFrameColumn <T> CreatePrimitiveDataFrameColumn <T>()
                where T : unmanaged
            {
                return(new PrimitiveDataFrameColumn <T>(columnNames == null ? "Column" + columnIndex.ToString() : columnNames[columnIndex]));
            }

            DataFrameColumn ret;

            if (kind == typeof(bool))
            {
                ret = CreatePrimitiveDataFrameColumn <bool>();
            }
            else if (kind == typeof(int))
            {
                ret = CreatePrimitiveDataFrameColumn <int>();
            }
            else if (kind == typeof(float))
            {
                ret = CreatePrimitiveDataFrameColumn <float>();
            }
            else if (kind == typeof(string))
            {
                ret = new StringDataFrameColumn(columnNames == null ? "Column" + columnIndex.ToString() : columnNames[columnIndex], 0);
            }
            else if (kind == typeof(long))
            {
                ret = CreatePrimitiveDataFrameColumn <long>();
            }
            else if (kind == typeof(decimal))
            {
                ret = CreatePrimitiveDataFrameColumn <decimal>();
            }
            else if (kind == typeof(byte))
            {
                ret = CreatePrimitiveDataFrameColumn <byte>();
            }
            else if (kind == typeof(char))
            {
                ret = CreatePrimitiveDataFrameColumn <char>();
            }
            else if (kind == typeof(double))
            {
                ret = CreatePrimitiveDataFrameColumn <double>();
            }
            else if (kind == typeof(sbyte))
            {
                ret = CreatePrimitiveDataFrameColumn <sbyte>();
            }
            else if (kind == typeof(short))
            {
                ret = CreatePrimitiveDataFrameColumn <short>();
            }
            else if (kind == typeof(uint))
            {
                ret = CreatePrimitiveDataFrameColumn <uint>();
            }
            else if (kind == typeof(ulong))
            {
                ret = CreatePrimitiveDataFrameColumn <ulong>();
            }
            else if (kind == typeof(ushort))
            {
                ret = CreatePrimitiveDataFrameColumn <ushort>();
            }
            else
            {
                throw new NotSupportedException(nameof(kind));
            }
            return(ret);
        }
示例#5
0
        private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int columnIndex)
        {
            DataFrameColumn ret;

            if (kind == typeof(bool))
            {
                ret = new BooleanDataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(int))
            {
                ret = new Int32DataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(float))
            {
                ret = new SingleDataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(string))
            {
                ret = new StringDataFrameColumn(GetColumnName(columnNames, columnIndex), 0);
            }
            else if (kind == typeof(long))
            {
                ret = new Int64DataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(decimal))
            {
                ret = new DecimalDataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(byte))
            {
                ret = new ByteDataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(char))
            {
                ret = new CharDataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(double))
            {
                ret = new DoubleDataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(sbyte))
            {
                ret = new SByteDataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(short))
            {
                ret = new Int16DataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(uint))
            {
                ret = new UInt32DataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(ulong))
            {
                ret = new UInt64DataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else if (kind == typeof(ushort))
            {
                ret = new UInt16DataFrameColumn(GetColumnName(columnNames, columnIndex));
            }
            else
            {
                throw new NotSupportedException(nameof(kind));
            }
            return(ret);
        }
示例#6
0
        /// <summary>
        /// Reads a seekable stream of CSV data into a DataFrame.
        /// Follows pandas API.
        /// </summary>
        /// <param name="csvStream">stream of CSV data to be read in</param>
        /// <param name="separator">column separator</param>
        /// <param name="header">has a header or not</param>
        /// <param name="columnNames">column names (can be empty)</param>
        /// <param name="dataTypes">column types (can be empty)</param>
        /// <param name="numberOfRowsToRead">number of rows to read not including the header(if present)</param>
        /// <param name="guessRows">number of rows used to guess types</param>
        /// <param name="addIndexColumn">add one column with the row index</param>
        /// <returns>DataFrame</returns>
        public static DataFrame LoadCsv(Stream csvStream,
                                        char separator          = ',', bool header       = true,
                                        string[] columnNames    = null, Type[] dataTypes = null,
                                        long numberOfRowsToRead = -1, int guessRows      = 10, bool addIndexColumn = false)
        {
            if (!csvStream.CanSeek)
            {
                throw new ArgumentException(Strings.NonSeekableStream, nameof(csvStream));
            }

            var  linesForGuessType = new List <string[]>();
            long rowline           = 0;
            int  numberOfColumns   = 0;

            if (header == true && numberOfRowsToRead != -1)
            {
                numberOfRowsToRead++;
            }

            List <DataFrameColumn> columns;
            long streamStart = csvStream.Position;

            // First pass: schema and number of rows.
            using (var streamReader = new StreamReader(csvStream, encoding: null, detectEncodingFromByteOrderMarks: true, bufferSize: -1, leaveOpen: true))
            {
                string line = streamReader.ReadLine();
                while (line != null)
                {
                    if ((numberOfRowsToRead == -1) || rowline < numberOfRowsToRead)
                    {
                        if (linesForGuessType.Count < guessRows)
                        {
                            var spl = line.Split(separator);
                            if (header && rowline == 0)
                            {
                                if (columnNames == null)
                                {
                                    columnNames = spl;
                                }
                            }
                            else
                            {
                                linesForGuessType.Add(spl);
                                numberOfColumns = Math.Max(numberOfColumns, spl.Length);
                            }
                        }
                    }
                    ++rowline;
                    if (rowline == numberOfRowsToRead)
                    {
                        break;
                    }
                    line = streamReader.ReadLine();
                }

                if (linesForGuessType.Count == 0)
                {
                    throw new FormatException(Strings.EmptyFile);
                }

                columns = new List <DataFrameColumn>(numberOfColumns);

                // Guesses types and adds columns.
                for (int i = 0; i < numberOfColumns; ++i)
                {
                    Type kind = GuessKind(i, linesForGuessType);
                    if (kind == typeof(bool))
                    {
                        DataFrameColumn boolColumn = new PrimitiveDataFrameColumn <bool>(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline);
                        columns.Add(boolColumn);
                    }
                    else if (kind == typeof(float))
                    {
                        DataFrameColumn floatColumn = new PrimitiveDataFrameColumn <float>(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline);
                        columns.Add(floatColumn);
                    }
                    else if (kind == typeof(string))
                    {
                        DataFrameColumn stringColumn = new StringDataFrameColumn(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline);
                        columns.Add(stringColumn);
                    }
                    else
                    {
                        throw new NotSupportedException(nameof(kind));
                    }
                }

                line = null;
                streamReader.DiscardBufferedData();
                streamReader.BaseStream.Seek(streamStart, SeekOrigin.Begin);

                // Fills values.
                line    = streamReader.ReadLine();
                rowline = 0;
                while (line != null && (numberOfRowsToRead == -1 || rowline < numberOfRowsToRead))
                {
                    var spl = line.Split(separator);
                    if (header && rowline == 0)
                    {
                        // Skips.
                    }
                    else
                    {
                        AppendRow(columns, header == true ? rowline - 1 : rowline, spl);
                    }
                    ++rowline;
                    line = streamReader.ReadLine();
                }

                if (addIndexColumn)
                {
                    PrimitiveDataFrameColumn <int> indexColumn = new PrimitiveDataFrameColumn <int>("IndexColumn", columns[0].Length);
                    for (int i = 0; i < columns[0].Length; i++)
                    {
                        indexColumn[i] = i;
                    }
                    columns.Insert(0, indexColumn);
                }
            }
            return(new DataFrame(columns));
        }