Beispiel #1
0
        /// <summary>
        /// Sort a mutable datatable in place by the given column.
        /// </summary>
        /// <param name="dt">dat table to sort</param>
        /// <param name="columnName">column name to sort on. Throws if missing</param>
        /// <param name="comparer">Comparer to use on column name</param>
        public static void Sort(this MutableDataTable dt, string columnName, IComparer <string> comparer)
        {
            if (comparer == null)
            {
                throw new ArgumentNullException("comparer");
            }
            var column = dt.GetColumn(columnName, throwOnMissing: true);

            int len = column.Values.Length;

            int[] map = new int[len];
            for (int i = 0; i < len; i++)
            {
                map[i] = i;
            }

            Array.Sort(column.Values, map, comparer);

            // Sort other columns for consistency
            foreach (var c in dt.Columns)
            {
                if (c == column)
                {
                    continue;
                }

                string[] newVals = new string[len];
                for (int i = 0; i < len; i++)
                {
                    newVals[i] = c.Values[map[i]];
                }
                c.Values = newVals;
            }
        }
Beispiel #2
0
        // $$$ Clarify - multiple joins (inner, outer, etc)

        /// <summary>
        /// Performs a full outer join on two in-memory tables and returns a new table.
        /// The number of rows in the resulting table is the sum of rows from each source table.
        /// The number of columns in teh new table is the sum of columns in the the source tables minus 1
        /// (since the join column is redundant)
        /// </summary>
        /// <param name="d1"></param>
        /// <param name="d2"></param>
        /// <param name="columnName">column name to join on. Both tables must have this column name.</param>
        /// <returns>a new table</returns>
        public static MutableDataTable Join(MutableDataTable d1, MutableDataTable d2, string columnName)
        {
            Column c1 = d1.GetColumn(columnName);

            if (c1 == null)
            {
                throw new InvalidOperationException("Missing column");
            }
            Column c2 = d2.GetColumn(columnName);

            if (c2 == null)
            {
                throw new InvalidOperationException("Missing column");
            }

            // Place d1 in first set of columns, and d2 in second set.
            int kColumn       = d1.Columns.Length;
            int kTotalColumns = kColumn + d2.Columns.Length;

            // Indices into new table where join columns are.
            int joinColumn1 = Utility.GetColumnIndexFromName(d1.ColumnNames, columnName);
            int joinColumn2 = Utility.GetColumnIndexFromName(d2.ColumnNames, columnName) + kColumn;

            // $$$ could really optimize. Sort both on column and then zip.
            Dictionary <string, int> m1 = GetRowIndex(c1);
            Dictionary <string, int> m2 = GetRowIndex(c2);

            // $$$ column names may not be unique.

            //string[] headers = d1.ColumnNames.Union(d2.ColumnNames).ToArray();

            string[] headers = new string[kTotalColumns];
            Array.Copy(d1.ColumnNames.ToArray(), 0, headers, 0, kColumn);
            Array.Copy(d2.ColumnNames.ToArray(), 0, headers, kColumn, kTotalColumns - kColumn);

            string[] values = new string[headers.Length];

            string path = GetTempFileName();

            using (CsvWriter tw = new CsvWriter(path, headers))
            {
                foreach (var kv in m1)
                {
                    Clear(values);

                    string key = kv.Key; // join column
                    int    r1  = kv.Value;
                    int    r2;
                    if (m2.TryGetValue(key, out r2))
                    {
                        // In both.  write out
                        CopyRowIntoArray(values, kColumn, d2, r2);

                        m2.Remove(key);
                    }
                    else
                    {
                        // Only in M1.
                    }

                    CopyRowIntoArray(values, 0, d1, r1);
                    values[joinColumn1] = values[joinColumn2] = key;

                    tw.WriteRow(values);
                }

                // We remove all of M1's items from m2, so M2 is just unique items now. (possibly 0).
                // Tag those onto the end.

                foreach (var kv in m2)
                {
                    int r2 = kv.Value;
                    Clear(values);
                    CopyRowIntoArray(values, kColumn, d2, r2);
                    values[joinColumn1] = values[joinColumn2] = kv.Key;

                    tw.WriteRow(values);
                }
            } // close tw

            MutableDataTable t = Reader.ReadCSV(path);

            DeleteLocalFile(path);

            // Remove duplicate columns.
            t.DeleteColumn(joinColumn2);

            return(t);
        }