예제 #1
0
        /// <summary>
        /// Groups the data by the values in the given column, and computes aggregate quantities for each group.
        /// </summary>
        /// <param name="groupByColumnName">The name of the column to group by.</param>
        /// <param name="aggregator">A function that computes the aggregate quantities.</param>
        /// <returns>A new data frame containing the aggregates for each group.</returns>
        /// <remarks>
        /// <para>The first column of the returned <see cref="FrameTable"/> has the same name as the
        /// original <paramref name="groupByColumnName"/> and contains all the distinct
        /// values of that column in the original view. There is an additional column for each
        /// dictionary entry returned by <paramref name="aggregator"/>, whose name is the returned
        /// key and whose values are values returned for each group.</para>
        /// <para>The function that computes the aggregate receives a <see cref="FrameView"/> containing
        /// all the rows in the group. To produce aggregate results, it can use values in any of
        /// the columns. Each invocation of the <paramref name="aggregator"/> must return the same keys
        /// and values for the same keys must be of the same type. (Values for different keys may be
        /// of different types.) Aggregate column names are taken from the keys and storage types are
        /// inferred from the returned values.</para>
        /// <para>To produce just one aggregate value, you may find it simpler and more efficient
        /// to use the <see cref="GroupBy(string, Func{FrameView, IReadOnlyDictionary{string, object}})"/>
        /// overload.</para>
        /// </remarks>
        public FrameTable GroupBy(string groupByColumnName, Func <FrameView, IReadOnlyDictionary <string, object> > aggregator)
        {
            // Collect rows into groups.
            int       groupByColumnIndex = GetColumnIndex(groupByColumnName);
            NamedList groupByColumn      = columns[groupByColumnIndex];
            NullableDictionary <object, List <int> > groups = FindGroups(groupByColumn);

            // Create a column to hold the group values.
            NamedList groupsColumn = NamedList.Create(groupByColumnName, groupByColumn.StorageType);

            // Create an enumerator that feeds the groups into the aggregator and presents them as dictionaries.
            IEnumerable <IReadOnlyDictionary <string, object> > aggregatesEnumerator = GetGroupEnumerator(groups, aggregator, groupsColumn);

            // Column-ify and validate the presented dictionaries.
            List <NamedList> aggregateColumns = DictionaryHelper.ReadDictionaries(aggregatesEnumerator);

            // Collect the results into a frame table.
            FrameTable result = new FrameTable();

            // First column is the group values.
            result.AddColumn(groupsColumn);
            // Remaining columns are aggregate columns.
            foreach (NamedList aggregateColumn in aggregateColumns)
            {
                result.AddColumn(aggregateColumn);
            }
            return(result);
        }
예제 #2
0
        /// <summary>
        /// Groups the data by the values in the given column, and computes the given aggregate quantity for each group.
        /// </summary>
        /// <typeparam name="T">The type of the aggregate output.</typeparam>
        /// <param name="groupByColumnName">The name of the column to group by.</param>
        /// <param name="aggregateColumnName">The name of the column for the aggregate output.</param>
        /// <param name="aggregator">A function that computes the aggregate quantity.</param>
        /// <returns>A new data frame containing the requested aggregate values for each group.</returns>
        /// <remarks>
        /// <para>The function that computes the aggregate receives a <see cref="FrameView"/> containing
        /// all the rows in the group. To produce an aggregate result, it can use values in any of
        /// the columns.</para>
        /// <para>To produce more than one aggregate value, use <see cref="GroupBy(string, Func{FrameView, IReadOnlyDictionary{string, object}})"/>.</para>
        /// </remarks>
        public FrameTable GroupBy <T>(string groupByColumnName, Func <FrameView, T> aggregator, string aggregateColumnName)
        {
            if (groupByColumnName == null)
            {
                throw new ArgumentNullException(nameof(groupByColumnName));
            }
            if (aggregator == null)
            {
                throw new ArgumentNullException(nameof(aggregator));
            }
            if (aggregateColumnName == null)
            {
                throw new ArgumentNullException(nameof(aggregateColumnName));
            }

            // Collect the rows into groups.
            int       groupByColumnIndex = GetColumnIndex(groupByColumnName);
            NamedList groupByColumn      = columns[groupByColumnIndex];
            NullableDictionary <object, List <int> > groups = FindGroups(groupByColumn);

            // Form destination columns based on group aggregates.
            NamedList     groupsColumn    = NamedList.Create(groupByColumnName, groupByColumn.StorageType);
            NamedList <T> aggregateColumn = new NamedList <T>(aggregateColumnName);

            foreach (KeyValuePair <object, List <int> > group in groups)
            {
                FrameView values         = new FrameView(this.columns, group.Value);
                T         aggregateValue = aggregator(values);
                aggregateColumn.AddItem(aggregateValue);

                object groupKey = group.Key;
                groupsColumn.AddItem(groupKey);
            }

            FrameTable result = new FrameTable(groupsColumn, aggregateColumn);

            return(result);
        }
예제 #3
0
        public static List <NamedList> ReadDictionaries(IEnumerable <IReadOnlyDictionary <string, object> > dictionaries)
        {
            Debug.Assert(dictionaries != null);

            // Iterate through the dictionaries, creating header objects that contain the un-cast values
            // and some information about them.
            List <DictionaryColumn> headers = null;

            foreach (IReadOnlyDictionary <string, object> dictionary in dictionaries)
            {
                // From the first row, create the headers list based on key names.
                if (headers == null)
                {
                    headers = new List <DictionaryColumn>(dictionary.Count);
                    foreach (string key in dictionary.Keys)
                    {
                        DictionaryColumn header = new DictionaryColumn()
                        {
                            Name = key, IsNullable = false, Type = null, Data = new List <object>()
                        };
                        headers.Add(header);
                    }
                }

                if (dictionary.Count != headers.Count)
                {
                    throw new InvalidOperationException();
                }

                // For all rows, check for null, record the type if we haven't found it yet, and store the value.
                for (int i = 0; i < headers.Count; i++)
                {
                    DictionaryColumn header = headers[i];
                    object           value  = dictionary[header.Name];
                    if (value == null)
                    {
                        header.IsNullable = true;
                    }
                    else
                    {
                        if (header.Type == null)
                        {
                            header.Type = value.GetType();
                        }
                    }
                    header.Data.Add(value);
                }
            }

            // Arrange the columns into named lists of the appropriate type
            List <NamedList> columns = new List <NamedList>(headers.Count);

            foreach (DictionaryColumn header in headers)
            {
                NamedList column;
                if (header.Type == null)
                {
                    // If no non-null value was ever found, we can't infer a type, so just make an object-column.
                    column = new NamedList <object>(header.Name, header.Data);
                }
                else
                {
                    // Based on null-ability and observed type, create the appropriate storage.
                    Type type = header.Type;
                    if (header.IsNullable && type.GetTypeInfo().IsValueType)
                    {
                        type = typeof(Nullable <>).MakeGenericType(type);
                    }
                    column = NamedList.Create(header.Name, type);
                    // Copy the objects into the storage, which will cast them to the storage type.
                    foreach (object value in header.Data)
                    {
                        column.AddItem(value);
                    }
                }
                columns.Add(column);
            }
            Debug.Assert(columns.Count == headers.Count);

            return(columns);
        }