public override BaseColumn Add(BaseColumn column, bool inPlace = false) { if (Length != column.Length) { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } StringColumn ret = inPlace ? this : Clone(); for (long i = 0; i < Length; i++) { ret[i] += column[i].ToString(); } return(ret); }
public override BaseColumn Add(BaseColumn column) { // TODO: Using indexing is VERY inefficient here. Each indexer call will find the "right" buffer and then return the value if (Length != column.Length) { throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column)); } StringColumn ret = Clone(); for (long i = 0; i < Length; i++) { ret[i] += column[i].ToString(); } return(ret); }
public override BaseColumn Add <T>(T value, bool inPlace = false) { StringColumn ret = inPlace ? this : Clone(); string valString = value.ToString(); for (int i = 0; i < ret._stringBuffers.Count; i++) { IList <string> buffer = ret._stringBuffers[i]; int bufferLen = buffer.Count; for (int j = 0; j < bufferLen; j++) { buffer[j] += valString; } } return(ret); }
/// <summary> /// Reads a text file as a DataFrame. /// Follows pandas API. /// </summary> /// <param name="createStream">function which creates a stream</param> /// <param name="separator">column separator</param> /// <param name="header">has a header or not</param> /// <param name="columnNames">column names (can be empty)</param> /// <param name="dataTypes">column types (can be empty)</param> /// <param name="numberOfRowsToRead">number of rows to read not including the header(if present)</param> /// <param name="guessRows">number of rows used to guess types</param> /// <param name="addIndexColumn">add one column with the row index</param> /// <returns>DataFrame</returns> public static DataFrame ReadStream(Func <StreamReader> createStream, char separator = ',', bool header = true, string[] columnNames = null, Type[] dataTypes = null, long numberOfRowsToRead = -1, int guessRows = 10, bool addIndexColumn = false) { var linesForGuessType = new List <string[]>(); long rowline = 0; int numberOfColumns = 0; if (header == true && numberOfRowsToRead != -1) { numberOfRowsToRead++; } // First pass: schema and number of rows. using (var st = createStream()) { string line = st.ReadLine(); while (line != null) { if ((numberOfRowsToRead == -1) || rowline < numberOfRowsToRead) { if (linesForGuessType.Count < guessRows) { var spl = line.Split(separator); if (header && rowline == 0) { if (columnNames == null) { columnNames = spl; } } else { linesForGuessType.Add(spl); numberOfColumns = Math.Max(numberOfColumns, spl.Length); } } } ++rowline; if (rowline == numberOfRowsToRead) { break; } line = st.ReadLine(); } } if (linesForGuessType.Count == 0) { throw new FormatException(Strings.EmptyFile); } List <BaseColumn> columns = new List <BaseColumn>(numberOfColumns); // Guesses types and adds columns. for (int i = 0; i < numberOfColumns; ++i) { Type kind = GuessKind(i, linesForGuessType); if (kind == typeof(bool)) { BaseColumn boolColumn = new PrimitiveColumn <bool>(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline); columns.Add(boolColumn); } else if (kind == typeof(float)) { BaseColumn floatColumn = new PrimitiveColumn <float>(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline); columns.Add(floatColumn); } else if (kind == typeof(string)) { BaseColumn stringColumn = new StringColumn(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline); columns.Add(stringColumn); } else { throw new NotSupportedException(nameof(kind)); } } // Fills values. using (StreamReader st = createStream()) { string line = st.ReadLine(); rowline = 0; while (line != null && (numberOfRowsToRead == -1 || rowline < numberOfRowsToRead)) { var spl = line.Split(separator); if (header && rowline == 0) { // Skips. } else { AppendRow(columns, header == true ? rowline - 1 : rowline, spl); } ++rowline; line = st.ReadLine(); } } if (addIndexColumn) { PrimitiveColumn <int> indexColumn = new PrimitiveColumn <int>("IndexColumn", columns[0].Length); for (int i = 0; i < columns[0].Length; i++) { indexColumn[i] = i; } columns.Insert(0, indexColumn); } return(new DataFrame(columns)); }