/// <summary> /// Creates a new frame table from a file of comma-separated values. /// </summary> /// <param name="reader">A reader positioned at the beginning of the file.</param> /// <returns>A new data frame with data from the file.</returns> /// <remarks> /// <para>The column names are taken from the first line of the file.</para> /// <para>The storage type of each column is inferred from the types of objects /// encountered are the frame table is constructed.</para> /// </remarks> public static FrameTable FromCsv(TextReader reader) { if (reader == null) { throw new ArgumentNullException(nameof(reader)); } NamedList <string>[] textColumns; DataAdaptor[] headers; ReadCsvAsStrings(reader, out textColumns, out headers); NamedList[] columns = new NamedList[headers.Length]; for (int columnIndex = 0; columnIndex < columns.Length; columnIndex++) { DataAdaptor header = headers[columnIndex]; if (header.TypeCandidates.Count == 0) { columns[columnIndex] = textColumns[columnIndex]; } else { TypeParser adaptor = header.TypeCandidates.First.Value; NamedList column = adaptor.CreateStorage(textColumns[columnIndex].Name, header.IsNullable); foreach (string textValue in textColumns[columnIndex]) { if (textValue == null) { column.AddItem(null); } else { object value = adaptor.Parse(textValue); column.AddItem(value); } } columns[columnIndex] = column; } } FrameTable frame = new FrameTable(columns); return(frame); }
// This method turns our group row dictionary into an iterator that can be fed into the dictionary parser. // As it iterates to produce the views and aggregates in turn, it also adds the group values to the given // groups column. This logic is so closely coupled to the internal logic of the GroupBy method that calls // it that I would rather do this via lambda inside it, but lambdas that produce iterators are not allowed // and I really want an iterator to feed into the shared dictionary parsing logic. private IEnumerable <IReadOnlyDictionary <string, object> > GetGroupEnumerator(NullableDictionary <object, List <int> > groups, Func <FrameView, IReadOnlyDictionary <string, object> > aggregator, NamedList groupsColumn) { foreach (KeyValuePair <object, List <int> > group in groups) { FrameView view = new FrameView(this.columns, group.Value); IReadOnlyDictionary <string, object> aggregate = aggregator(view); yield return(aggregate); groupsColumn.AddItem(group.Key); } }
/// <summary> /// Groups the data by the values in the given column, and computes the given aggregate quantity for each group. /// </summary> /// <typeparam name="T">The type of the aggregate output.</typeparam> /// <param name="groupByColumnName">The name of the column to group by.</param> /// <param name="aggregateColumnName">The name of the column for the aggregate output.</param> /// <param name="aggregator">A function that computes the aggregate quantity.</param> /// <returns>A new data frame containing the requested aggregate values for each group.</returns> /// <remarks> /// <para>The function that computes the aggregate receives a <see cref="FrameView"/> containing /// all the rows in the group. To produce an aggregate result, it can use values in any of /// the columns.</para> /// <para>To produce more than one aggregate value, use <see cref="GroupBy(string, Func{FrameView, IReadOnlyDictionary{string, object}})"/>.</para> /// </remarks> public FrameTable GroupBy <T>(string groupByColumnName, Func <FrameView, T> aggregator, string aggregateColumnName) { if (groupByColumnName == null) { throw new ArgumentNullException(nameof(groupByColumnName)); } if (aggregator == null) { throw new ArgumentNullException(nameof(aggregator)); } if (aggregateColumnName == null) { throw new ArgumentNullException(nameof(aggregateColumnName)); } // Collect the rows into groups. int groupByColumnIndex = GetColumnIndex(groupByColumnName); NamedList groupByColumn = columns[groupByColumnIndex]; NullableDictionary <object, List <int> > groups = FindGroups(groupByColumn); // Form destination columns based on group aggregates. NamedList groupsColumn = NamedList.Create(groupByColumnName, groupByColumn.StorageType); NamedList <T> aggregateColumn = new NamedList <T>(aggregateColumnName); foreach (KeyValuePair <object, List <int> > group in groups) { FrameView values = new FrameView(this.columns, group.Value); T aggregateValue = aggregator(values); aggregateColumn.AddItem(aggregateValue); object groupKey = group.Key; groupsColumn.AddItem(groupKey); } FrameTable result = new FrameTable(groupsColumn, aggregateColumn); return(result); }
/// <summary> /// Adds a new row of data to the data frame. /// </summary> /// <param name="values">A dictionary that maps the existing column names to the cell values for the new row.</param> public void AddRow(IReadOnlyDictionary <string, object> values) { if (values == null) { throw new ArgumentNullException(nameof(values)); } int rowCount = map.Count; for (int columnIndex = 0; columnIndex < columns.Count; columnIndex++) { NamedList column = columns[columnIndex]; if (column.IsComputed) { continue; } object value = values[column.Name]; int rowIndex = column.AddItem(value); if (rowIndex != rowCount) { throw new InvalidOperationException(); } } map.Add(rowCount); }