public override DataFrameColumn Add(DataFrameColumn column, bool inPlace = false) { if (Length != column.Length) { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } StringDataFrameColumn ret = inPlace ? this : Clone(); for (long i = 0; i < Length; i++) { ret[i] += column[i].ToString(); } return(ret); }
public StringDataFrameColumn Add(string value, bool inPlace = false) { StringDataFrameColumn ret = inPlace ? this : Clone(); for (int i = 0; i < ret._stringBuffers.Count; i++) { IList <string> buffer = ret._stringBuffers[i]; int bufferLen = buffer.Count; for (int j = 0; j < bufferLen; j++) { buffer[j] += value; } } return(ret); }
public static StringDataFrameColumn Add(string value, StringDataFrameColumn right) { StringDataFrameColumn ret = right.Clone(); for (int i = 0; i < ret._stringBuffers.Count; i++) { IList <string> buffer = ret._stringBuffers[i]; int bufferLen = buffer.Count; for (int j = 0; j < bufferLen; j++) { buffer[j] = value + buffer[j]; } } return(ret); }
private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int columnIndex) { PrimitiveDataFrameColumn <T> CreatePrimitiveDataFrameColumn <T>() where T : unmanaged { return(new PrimitiveDataFrameColumn <T>(columnNames == null ? "Column" + columnIndex.ToString() : columnNames[columnIndex])); } DataFrameColumn ret; if (kind == typeof(bool)) { ret = CreatePrimitiveDataFrameColumn <bool>(); } else if (kind == typeof(int)) { ret = CreatePrimitiveDataFrameColumn <int>(); } else if (kind == typeof(float)) { ret = CreatePrimitiveDataFrameColumn <float>(); } else if (kind == typeof(string)) { ret = new StringDataFrameColumn(columnNames == null ? "Column" + columnIndex.ToString() : columnNames[columnIndex], 0); } else if (kind == typeof(long)) { ret = CreatePrimitiveDataFrameColumn <long>(); } else if (kind == typeof(decimal)) { ret = CreatePrimitiveDataFrameColumn <decimal>(); } else if (kind == typeof(byte)) { ret = CreatePrimitiveDataFrameColumn <byte>(); } else if (kind == typeof(char)) { ret = CreatePrimitiveDataFrameColumn <char>(); } else if (kind == typeof(double)) { ret = CreatePrimitiveDataFrameColumn <double>(); } else if (kind == typeof(sbyte)) { ret = CreatePrimitiveDataFrameColumn <sbyte>(); } else if (kind == typeof(short)) { ret = CreatePrimitiveDataFrameColumn <short>(); } else if (kind == typeof(uint)) { ret = CreatePrimitiveDataFrameColumn <uint>(); } else if (kind == typeof(ulong)) { ret = CreatePrimitiveDataFrameColumn <ulong>(); } else if (kind == typeof(ushort)) { ret = CreatePrimitiveDataFrameColumn <ushort>(); } else { throw new NotSupportedException(nameof(kind)); } return(ret); }
private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int columnIndex) { DataFrameColumn ret; if (kind == typeof(bool)) { ret = new BooleanDataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(int)) { ret = new Int32DataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(float)) { ret = new SingleDataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(string)) { ret = new StringDataFrameColumn(GetColumnName(columnNames, columnIndex), 0); } else if (kind == typeof(long)) { ret = new Int64DataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(decimal)) { ret = new DecimalDataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(byte)) { ret = new ByteDataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(char)) { ret = new CharDataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(double)) { ret = new DoubleDataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(sbyte)) { ret = new SByteDataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(short)) { ret = new Int16DataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(uint)) { ret = new UInt32DataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(ulong)) { ret = new UInt64DataFrameColumn(GetColumnName(columnNames, columnIndex)); } else if (kind == typeof(ushort)) { ret = new UInt16DataFrameColumn(GetColumnName(columnNames, columnIndex)); } else { throw new NotSupportedException(nameof(kind)); } return(ret); }
/// <summary> /// Reads a seekable stream of CSV data into a DataFrame. /// Follows pandas API. /// </summary> /// <param name="csvStream">stream of CSV data to be read in</param> /// <param name="separator">column separator</param> /// <param name="header">has a header or not</param> /// <param name="columnNames">column names (can be empty)</param> /// <param name="dataTypes">column types (can be empty)</param> /// <param name="numberOfRowsToRead">number of rows to read not including the header(if present)</param> /// <param name="guessRows">number of rows used to guess types</param> /// <param name="addIndexColumn">add one column with the row index</param> /// <returns>DataFrame</returns> public static DataFrame LoadCsv(Stream csvStream, char separator = ',', bool header = true, string[] columnNames = null, Type[] dataTypes = null, long numberOfRowsToRead = -1, int guessRows = 10, bool addIndexColumn = false) { if (!csvStream.CanSeek) { throw new ArgumentException(Strings.NonSeekableStream, nameof(csvStream)); } var linesForGuessType = new List <string[]>(); long rowline = 0; int numberOfColumns = 0; if (header == true && numberOfRowsToRead != -1) { numberOfRowsToRead++; } List <DataFrameColumn> columns; long streamStart = csvStream.Position; // First pass: schema and number of rows. using (var streamReader = new StreamReader(csvStream, encoding: null, detectEncodingFromByteOrderMarks: true, bufferSize: -1, leaveOpen: true)) { string line = streamReader.ReadLine(); while (line != null) { if ((numberOfRowsToRead == -1) || rowline < numberOfRowsToRead) { if (linesForGuessType.Count < guessRows) { var spl = line.Split(separator); if (header && rowline == 0) { if (columnNames == null) { columnNames = spl; } } else { linesForGuessType.Add(spl); numberOfColumns = Math.Max(numberOfColumns, spl.Length); } } } ++rowline; if (rowline == numberOfRowsToRead) { break; } line = streamReader.ReadLine(); } if (linesForGuessType.Count == 0) { throw new FormatException(Strings.EmptyFile); } columns = new List <DataFrameColumn>(numberOfColumns); // Guesses types and adds columns. for (int i = 0; i < numberOfColumns; ++i) { Type kind = GuessKind(i, linesForGuessType); if (kind == typeof(bool)) { DataFrameColumn boolColumn = new PrimitiveDataFrameColumn <bool>(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline); columns.Add(boolColumn); } else if (kind == typeof(float)) { DataFrameColumn floatColumn = new PrimitiveDataFrameColumn <float>(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline); columns.Add(floatColumn); } else if (kind == typeof(string)) { DataFrameColumn stringColumn = new StringDataFrameColumn(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline); columns.Add(stringColumn); } else { throw new NotSupportedException(nameof(kind)); } } line = null; streamReader.DiscardBufferedData(); streamReader.BaseStream.Seek(streamStart, SeekOrigin.Begin); // Fills values. line = streamReader.ReadLine(); rowline = 0; while (line != null && (numberOfRowsToRead == -1 || rowline < numberOfRowsToRead)) { var spl = line.Split(separator); if (header && rowline == 0) { // Skips. } else { AppendRow(columns, header == true ? rowline - 1 : rowline, spl); } ++rowline; line = streamReader.ReadLine(); } if (addIndexColumn) { PrimitiveDataFrameColumn <int> indexColumn = new PrimitiveDataFrameColumn <int>("IndexColumn", columns[0].Length); for (int i = 0; i < columns[0].Length; i++) { indexColumn[i] = i; } columns.Insert(0, indexColumn); } } return(new DataFrame(columns)); }