Example #1
0
        public override BaseColumn Add(BaseColumn column, bool inPlace = false)
        {
            if (Length != column.Length)
            {
                throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
            }
            StringColumn ret = inPlace ? this : Clone();

            for (long i = 0; i < Length; i++)
            {
                ret[i] += column[i].ToString();
            }
            return(ret);
        }
        public override BaseColumn Add(BaseColumn column)
        {
            // TODO: Using indexing is VERY inefficient here. Each indexer call will find the "right" buffer and then return the value
            if (Length != column.Length)
            {
                throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
            }
            StringColumn ret = Clone();

            for (long i = 0; i < Length; i++)
            {
                ret[i] += column[i].ToString();
            }
            return(ret);
        }
Example #3
0
        public override BaseColumn Add <T>(T value, bool inPlace = false)
        {
            StringColumn ret       = inPlace ? this : Clone();
            string       valString = value.ToString();

            for (int i = 0; i < ret._stringBuffers.Count; i++)
            {
                IList <string> buffer    = ret._stringBuffers[i];
                int            bufferLen = buffer.Count;
                for (int j = 0; j < bufferLen; j++)
                {
                    buffer[j] += valString;
                }
            }
            return(ret);
        }
        /// <summary>
        /// Reads a text file as a DataFrame.
        /// Follows pandas API.
        /// </summary>
        /// <param name="createStream">function which creates a stream</param>
        /// <param name="separator">column separator</param>
        /// <param name="header">has a header or not</param>
        /// <param name="columnNames">column names (can be empty)</param>
        /// <param name="dataTypes">column types (can be empty)</param>
        /// <param name="numberOfRowsToRead">number of rows to read not including the header(if present)</param>
        /// <param name="guessRows">number of rows used to guess types</param>
        /// <param name="addIndexColumn">add one column with the row index</param>
        /// <returns>DataFrame</returns>
        public static DataFrame ReadStream(Func <StreamReader> createStream,
                                           char separator          = ',', bool header       = true,
                                           string[] columnNames    = null, Type[] dataTypes = null,
                                           long numberOfRowsToRead = -1, int guessRows      = 10, bool addIndexColumn = false)
        {
            var  linesForGuessType = new List <string[]>();
            long rowline           = 0;
            int  numberOfColumns   = 0;

            if (header == true && numberOfRowsToRead != -1)
            {
                numberOfRowsToRead++;
            }

            // First pass: schema and number of rows.
            using (var st = createStream())
            {
                string line = st.ReadLine();
                while (line != null)
                {
                    if ((numberOfRowsToRead == -1) || rowline < numberOfRowsToRead)
                    {
                        if (linesForGuessType.Count < guessRows)
                        {
                            var spl = line.Split(separator);
                            if (header && rowline == 0)
                            {
                                if (columnNames == null)
                                {
                                    columnNames = spl;
                                }
                            }
                            else
                            {
                                linesForGuessType.Add(spl);
                                numberOfColumns = Math.Max(numberOfColumns, spl.Length);
                            }
                        }
                    }
                    ++rowline;
                    if (rowline == numberOfRowsToRead)
                    {
                        break;
                    }
                    line = st.ReadLine();
                }
            }

            if (linesForGuessType.Count == 0)
            {
                throw new FormatException(Strings.EmptyFile);
            }

            List <BaseColumn> columns = new List <BaseColumn>(numberOfColumns);

            // Guesses types and adds columns.
            for (int i = 0; i < numberOfColumns; ++i)
            {
                Type kind = GuessKind(i, linesForGuessType);
                if (kind == typeof(bool))
                {
                    BaseColumn boolColumn = new PrimitiveColumn <bool>(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline);
                    columns.Add(boolColumn);
                }
                else if (kind == typeof(float))
                {
                    BaseColumn floatColumn = new PrimitiveColumn <float>(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline);
                    columns.Add(floatColumn);
                }
                else if (kind == typeof(string))
                {
                    BaseColumn stringColumn = new StringColumn(columnNames == null ? "Column" + i.ToString() : columnNames[i], header == true ? rowline - 1 : rowline);
                    columns.Add(stringColumn);
                }
                else
                {
                    throw new NotSupportedException(nameof(kind));
                }
            }

            // Fills values.
            using (StreamReader st = createStream())
            {
                string line = st.ReadLine();
                rowline = 0;
                while (line != null && (numberOfRowsToRead == -1 || rowline < numberOfRowsToRead))
                {
                    var spl = line.Split(separator);
                    if (header && rowline == 0)
                    {
                        // Skips.
                    }
                    else
                    {
                        AppendRow(columns, header == true ? rowline - 1 : rowline, spl);
                    }
                    ++rowline;
                    line = st.ReadLine();
                }
            }

            if (addIndexColumn)
            {
                PrimitiveColumn <int> indexColumn = new PrimitiveColumn <int>("IndexColumn", columns[0].Length);
                for (int i = 0; i < columns[0].Length; i++)
                {
                    indexColumn[i] = i;
                }
                columns.Insert(0, indexColumn);
            }
            return(new DataFrame(columns));
        }