Esempio n. 1
0
        /*
         * /// <summary>
         * /// Joins this data view to another data view on the given column.
         * /// </summary>
         * /// <param name="other"></param>
         * /// <param name="columnName"></param>
         * /// <returns></returns>
         * public DataFrame Join (DataFrame other, string columnName)
         * {
         *  int thisColumnIndex = this.GetColumnIndex(columnName);
         *  Type thisType = this.columns[thisColumnIndex].StorageType;
         *  int otherColumnIndex = other.GetColumnIndex(columnName);
         *  Type otherType = other.columns[otherColumnIndex].StorageType;
         *
         *  // Form a lookup from the other table
         *  Dictionary<object, int> hash = new Dictionary<object, int>();
         *  for (int otherRowIndex = 0; otherRowIndex < other.Rows.Count; otherRowIndex++)
         *  {
         *      hash[other.columns[otherColumnIndex].GetItem(other.map[otherRowIndex])] = otherRowIndex;
         *  }
         *
         *  // Construct the joined columns
         *  List<DataList> joinedColumns = new List<DataList>();
         *  for (int i = 0; i < this.columns.Count; i++)
         *  {
         *      DataList joinedColumn = DataList.Create(this.columns[i].Name, this.columns[i].StorageType);
         *      joinedColumns.Add(joinedColumn);
         *  }
         *  for (int j = 0; j < other.columns.Count; j++)
         *  {
         *      DataList joinedColumn = DataList.Create(other.columns[j].Name, other.columns[j].StorageType);
         *      joinedColumns.Add(joinedColumn);
         *  }
         *
         *  // Populate the joined columns
         *  for (int thisRowIndex = 0; thisRowIndex < this.map.Count; thisRowIndex++)
         *  {
         *      object thisValue = this.columns[thisColumnIndex].GetItem(this.map[thisRowIndex]);
         *      int otherRowIndex;
         *      if (hash.TryGetValue(thisValue, out otherRowIndex))
         *      {
         *          for (int i = 0; i < this.columns.Count; i++)
         *          {
         *              joinedColumns[i].AddItem(this.columns[i].GetItem(this.map[i]));
         *          }
         *          for (int j = 0; j < other.columns.Count; j++)
         *          {
         *              joinedColumns[this.columns.Count + j].AddItem(other.columns[j].GetItem(other.map[otherRowIndex]));
         *          }
         *      }
         *  }
         *
         *  DataFrame result = new DataFrame(joinedColumns);
         *  return (result);
         *
         * }
         */

        internal void AddColumn(NamedList column)
        {
            if (column == null)
            {
                throw new ArgumentNullException(nameof(column));
            }
            if (this.columns.Count == 0)
            {
                // This is the first column; create a row map.
                for (int i = 0; i < column.Count; i++)
                {
                    this.map.Add(i);
                }
            }
            else
            {
                // This is not the first column; if it isn't computed, it's length must match the existing columns.
                if (!column.IsComputed && column.Count != map.Count)
                {
                    throw new DimensionMismatchException();
                }
            }
            this.columnMap.Add(column.Name, this.columns.Count);
            this.columns.Add(column);
        }
Esempio n. 2
0
 public ConvertedFrameColumn(NamedList column, List <int> map)
 {
     Debug.Assert(column != null);
     Debug.Assert(map != null);
     this.column = column;
     this.map    = map;
 }
Esempio n. 3
0
        /// <summary>
        /// Groups the data by the values in the given column, and computes aggregate quantities for each group.
        /// </summary>
        /// <param name="groupByColumnName">The name of the column to group by.</param>
        /// <param name="aggregator">A function that computes the aggregate quantities.</param>
        /// <returns>A new data frame containing the aggregates for each group.</returns>
        /// <remarks>
        /// <para>The first column of the returned <see cref="FrameTable"/> has the same name as the
        /// original <paramref name="groupByColumnName"/> and contains all the distinct
        /// values of that column in the original view. There is an additional column for each
        /// dictionary entry returned by <paramref name="aggregator"/>, whose name is the returned
        /// key and whose values are values returned for each group.</para>
        /// <para>The function that computes the aggregate receives a <see cref="FrameView"/> containing
        /// all the rows in the group. To produce aggregate results, it can use values in any of
        /// the columns. Each invocation of the <paramref name="aggregator"/> must return the same keys
        /// and values for the same keys must be of the same type. (Values for different keys may be
        /// of different types.) Aggregate column names are taken from the keys and storage types are
        /// inferred from the returned values.</para>
        /// <para>To produce just one aggregate value, you may find it simpler and more efficient
        /// to use the <see cref="GroupBy(string, Func{FrameView, IReadOnlyDictionary{string, object}})"/>
        /// overload.</para>
        /// </remarks>
        public FrameTable GroupBy(string groupByColumnName, Func <FrameView, IReadOnlyDictionary <string, object> > aggregator)
        {
            // Collect rows into groups.
            int       groupByColumnIndex = GetColumnIndex(groupByColumnName);
            NamedList groupByColumn      = columns[groupByColumnIndex];
            NullableDictionary <object, List <int> > groups = FindGroups(groupByColumn);

            // Create a column to hold the group values.
            NamedList groupsColumn = NamedList.Create(groupByColumnName, groupByColumn.StorageType);

            // Create an enumerator that feeds the groups into the aggregator and presents them as dictionaries.
            IEnumerable <IReadOnlyDictionary <string, object> > aggregatesEnumerator = GetGroupEnumerator(groups, aggregator, groupsColumn);

            // Column-ify and validate the presented dictionaries.
            List <NamedList> aggregateColumns = DictionaryHelper.ReadDictionaries(aggregatesEnumerator);

            // Collect the results into a frame table.
            FrameTable result = new FrameTable();

            // First column is the group values.
            result.AddColumn(groupsColumn);
            // Remaining columns are aggregate columns.
            foreach (NamedList aggregateColumn in aggregateColumns)
            {
                result.AddColumn(aggregateColumn);
            }
            return(result);
        }
Esempio n. 4
0
        private static void ReadCsvAsStrings(TextReader reader, out NamedList <string>[] columns, out DataAdaptor[] headers)
        {
            Debug.Assert(reader != null);

            // Get the column names from the first line.

            string firstline = reader.ReadLine();

            if (firstline == null)
            {
                columns = null;
                headers = null;
                return;
            }

            List <string> names = CsvHelper.ReadCells(firstline);
            int           count = names.Count;

            // Put the columns into lists of strings, and as we do so, maintain the collection of
            // types it can be parsed into, and whether any entries are null.

            columns = new NamedList <string> [names.Count];
            headers = new DataAdaptor[names.Count];
            for (int columnIndex = 0; columnIndex < columns.Length; columnIndex++)
            {
                columns[columnIndex] = new NamedList <string>(names[columnIndex]);
                headers[columnIndex] = new DataAdaptor();
            }

            while (true)
            {
                string line = reader.ReadLine();
                if (line == null)
                {
                    break;
                }
                List <string> cells = CsvHelper.ReadCells(line);
                if (cells.Count != count)
                {
                    throw new FormatException();
                }
                for (int columnIndex = 0; columnIndex < count; columnIndex++)
                {
                    string cell = cells[columnIndex];
                    if (String.IsNullOrEmpty(cell))
                    {
                        headers[columnIndex].IsNullable = true;
                        columns[columnIndex].Add(null);
                    }
                    else
                    {
                        DataAdaptor header = headers[columnIndex];
                        header.TryParse(cell);
                        columns[columnIndex].Add(cell);
                    }
                }
            }
        }
Esempio n. 5
0
        /// <summary>
        /// Creates a new frame table from a file of comma-separated values.
        /// </summary>
        /// <param name="reader">A reader positioned at the beginning of the file.</param>
        /// <returns>A new data frame with data from the file.</returns>
        /// <remarks>
        /// <para>The column names are taken from the first line of the file.</para>
        /// <para>The storage type of each column is inferred from the types of objects
        /// encountered are the frame table is constructed.</para>
        /// </remarks>
        public static FrameTable FromCsv(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException(nameof(reader));
            }

            NamedList <string>[] textColumns;
            DataAdaptor[]        headers;
            ReadCsvAsStrings(reader, out textColumns, out headers);

            NamedList[] columns = new NamedList[headers.Length];
            for (int columnIndex = 0; columnIndex < columns.Length; columnIndex++)
            {
                DataAdaptor header = headers[columnIndex];
                if (header.TypeCandidates.Count == 0)
                {
                    columns[columnIndex] = textColumns[columnIndex];
                }
                else
                {
                    TypeParser adaptor = header.TypeCandidates.First.Value;
                    NamedList  column  = adaptor.CreateStorage(textColumns[columnIndex].Name, header.IsNullable);
                    foreach (string textValue in textColumns[columnIndex])
                    {
                        if (textValue == null)
                        {
                            column.AddItem(null);
                        }
                        else
                        {
                            object value = adaptor.Parse(textValue);
                            column.AddItem(value);
                        }
                    }
                    columns[columnIndex] = column;
                }
            }

            FrameTable frame = new FrameTable(columns);

            return(frame);
        }
Esempio n. 6
0
        // This method collects rows into groups by the group-defining column. It is shared by
        // both GroupBy overloads.
        private NullableDictionary <object, List <int> > FindGroups(NamedList groupByColumn)
        {
            NullableDictionary <object, List <int> > groups = new NullableDictionary <object, List <int> >();

            for (int r = 0; r < this.map.Count; r++)
            {
                int        index = this.map[r];
                object     value = groupByColumn.GetItem(index);
                List <int> members;
                if (!groups.TryGetValue(value, out members))
                {
                    members = new List <int>();
                    groups.Add(value, members);
                }
                members.Add(index);
            }

            return(groups);
        }
Esempio n. 7
0
        /// <summary>
        /// Sort the rows by the values in the given column in the given direction.
        /// </summary>
        /// <param name="columnName">The name of the column to sort by.</param>
        /// <param name="order">The direction of the ordering.</param>
        /// <returns>A new view, with rows sorted by the values in the given column.</returns>
        /// <remarks>
        /// <para><see langword="null"/> values are supported and are ordered before all other values.</para>
        /// <para>The type of data in the column must implement <see cref="IComparable"/>.</para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="columnName"/> is <see langword="null"/>.</exception>
        /// <exception cref="IndexOutOfRangeException"><paramref name="columnName"/> is not the name of a column in the view.</exception>
        /// <exception cref="InvalidCastException">The type of data in the column is not <see cref="IComparable"/>.</exception>
        public FrameView OrderBy(string columnName, SortOrder order)
        {
            if (columnName == null)
            {
                throw new ArgumentNullException(nameof(columnName));
            }

            int        columnIndex = GetColumnIndex(columnName);
            NamedList  column      = columns[columnIndex];
            List <int> newMap      = new List <int>(map);

            if (order == SortOrder.Ascending)
            {
                newMap.Sort((i, j) => NullableComparer((IComparable)column.GetItem(i), (IComparable)column.GetItem(j)));
            }
            else
            {
                newMap.Sort((i, j) => NullableComparer((IComparable)column.GetItem(j), (IComparable)column.GetItem(i)));
            }
            return(new FrameView(this.columns, newMap));
        }
Esempio n. 8
0
        /// <summary>
        /// Groups the data by the values in the given column, and computes the given aggregate quantity for each group.
        /// </summary>
        /// <typeparam name="T">The type of the aggregate output.</typeparam>
        /// <param name="groupByColumnName">The name of the column to group by.</param>
        /// <param name="aggregateColumnName">The name of the column for the aggregate output.</param>
        /// <param name="aggregator">A function that computes the aggregate quantity.</param>
        /// <returns>A new data frame containing the requested aggregate values for each group.</returns>
        /// <remarks>
        /// <para>The function that computes the aggregate receives a <see cref="FrameView"/> containing
        /// all the rows in the group. To produce an aggregate result, it can use values in any of
        /// the columns.</para>
        /// <para>To produce more than one aggregate value, use <see cref="GroupBy(string, Func{FrameView, IReadOnlyDictionary{string, object}})"/>.</para>
        /// </remarks>
        public FrameTable GroupBy <T>(string groupByColumnName, Func <FrameView, T> aggregator, string aggregateColumnName)
        {
            if (groupByColumnName == null)
            {
                throw new ArgumentNullException(nameof(groupByColumnName));
            }
            if (aggregator == null)
            {
                throw new ArgumentNullException(nameof(aggregator));
            }
            if (aggregateColumnName == null)
            {
                throw new ArgumentNullException(nameof(aggregateColumnName));
            }

            // Collect the rows into groups.
            int       groupByColumnIndex = GetColumnIndex(groupByColumnName);
            NamedList groupByColumn      = columns[groupByColumnIndex];
            NullableDictionary <object, List <int> > groups = FindGroups(groupByColumn);

            // Form destination columns based on group aggregates.
            NamedList     groupsColumn    = NamedList.Create(groupByColumnName, groupByColumn.StorageType);
            NamedList <T> aggregateColumn = new NamedList <T>(aggregateColumnName);

            foreach (KeyValuePair <object, List <int> > group in groups)
            {
                FrameView values         = new FrameView(this.columns, group.Value);
                T         aggregateValue = aggregator(values);
                aggregateColumn.AddItem(aggregateValue);

                object groupKey = group.Key;
                groupsColumn.AddItem(groupKey);
            }

            FrameTable result = new FrameTable(groupsColumn, aggregateColumn);

            return(result);
        }
Esempio n. 9
0
        /// <summary>
        /// Adds a new row of data to the data frame.
        /// </summary>
        /// <param name="values">A dictionary that maps the existing column names to the cell values for the new row.</param>
        public void AddRow(IReadOnlyDictionary <string, object> values)
        {
            if (values == null)
            {
                throw new ArgumentNullException(nameof(values));
            }
            int rowCount = map.Count;

            for (int columnIndex = 0; columnIndex < columns.Count; columnIndex++)
            {
                NamedList column = columns[columnIndex];
                if (column.IsComputed)
                {
                    continue;
                }
                object value    = values[column.Name];
                int    rowIndex = column.AddItem(value);
                if (rowIndex != rowCount)
                {
                    throw new InvalidOperationException();
                }
            }
            map.Add(rowCount);
        }
Esempio n. 10
0
        public static List <NamedList> ReadDictionaries(IEnumerable <IReadOnlyDictionary <string, object> > dictionaries)
        {
            Debug.Assert(dictionaries != null);

            // Iterate through the dictionaries, creating header objects that contain the un-cast values
            // and some information about them.
            List <DictionaryColumn> headers = null;

            foreach (IReadOnlyDictionary <string, object> dictionary in dictionaries)
            {
                // From the first row, create the headers list based on key names.
                if (headers == null)
                {
                    headers = new List <DictionaryColumn>(dictionary.Count);
                    foreach (string key in dictionary.Keys)
                    {
                        DictionaryColumn header = new DictionaryColumn()
                        {
                            Name = key, IsNullable = false, Type = null, Data = new List <object>()
                        };
                        headers.Add(header);
                    }
                }

                if (dictionary.Count != headers.Count)
                {
                    throw new InvalidOperationException();
                }

                // For all rows, check for null, record the type if we haven't found it yet, and store the value.
                for (int i = 0; i < headers.Count; i++)
                {
                    DictionaryColumn header = headers[i];
                    object           value  = dictionary[header.Name];
                    if (value == null)
                    {
                        header.IsNullable = true;
                    }
                    else
                    {
                        if (header.Type == null)
                        {
                            header.Type = value.GetType();
                        }
                    }
                    header.Data.Add(value);
                }
            }

            // Arrange the columns into named lists of the appropriate type
            List <NamedList> columns = new List <NamedList>(headers.Count);

            foreach (DictionaryColumn header in headers)
            {
                NamedList column;
                if (header.Type == null)
                {
                    // If no non-null value was ever found, we can't infer a type, so just make an object-column.
                    column = new NamedList <object>(header.Name, header.Data);
                }
                else
                {
                    // Based on null-ability and observed type, create the appropriate storage.
                    Type type = header.Type;
                    if (header.IsNullable && type.GetTypeInfo().IsValueType)
                    {
                        type = typeof(Nullable <>).MakeGenericType(type);
                    }
                    column = NamedList.Create(header.Name, type);
                    // Copy the objects into the storage, which will cast them to the storage type.
                    foreach (object value in header.Data)
                    {
                        column.AddItem(value);
                    }
                }
                columns.Add(column);
            }
            Debug.Assert(columns.Count == headers.Count);

            return(columns);
        }
Esempio n. 11
0
        // This method turns our group row dictionary into an iterator that can be fed into the dictionary parser.
        // As it iterates to produce the views and aggregates in turn, it also adds the group values to the given
        // groups column. This logic is so closely coupled to the internal logic of the GroupBy method that calls
        // it that I would rather do this via lambda inside it, but lambdas that produce iterators are not allowed
        // and I really want an iterator to feed into the shared dictionary parsing logic.
        private IEnumerable <IReadOnlyDictionary <string, object> > GetGroupEnumerator(NullableDictionary <object, List <int> > groups, Func <FrameView, IReadOnlyDictionary <string, object> > aggregator, NamedList groupsColumn)
        {
            foreach (KeyValuePair <object, List <int> > group in groups)
            {
                FrameView view = new FrameView(this.columns, group.Value);
                IReadOnlyDictionary <string, object> aggregate = aggregator(view);
                yield return(aggregate);

                groupsColumn.AddItem(group.Key);
            }
        }