protected override void Initialize()
        {
            base.Initialize();

            // Getting initial shape from our neighboors.
            int[] proposedShape      = new int[Rank];
            var   sch                = DataSet.GetSchema(SchemaVersion.Recent);
            ReadOnlyDimensionList dl = this.Dimensions;
            int j = 0;

            foreach (var v in sch.Variables)
            {
                if (v.ID == DataSet.GlobalMetadataVariableID || v.ID == ID)
                {
                    continue;
                }
                for (int k = 0; k < v.Dimensions.Count; k++)
                {
                    var d = v.Dimensions[k];
                    if (dl.Contains(d.Name))
                    {
                        j = dl.FindIndex(d.Name);
                        if (proposedShape[j] < d.Length)
                        {
                            proposedShape[j] = d.Length;
                        }
                    }
                }
            }
            changes.Shape             = proposedShape;
            changes.AffectedRectangle = new Rectangle(new int[Rank], proposedShape);
        }
Beispiel #2
0
        public static SerializableDataSetSchema GetSerializableSchema(this DataSet ds)
        {
            var  schema = ds.GetSchema();
            Type empty  = typeof(EmptyValueType);

            //ignoring global metadata in variable list
            return(new SerializableDataSetSchema(schema.GetDimensions().Select <Dimension, SerializableDimension>(x => x.AsSerializble()).ToArray(),
                                                 schema.Variables.Where(x => x.TypeOfData != empty || x.ID != 0).Select <VariableSchema, SerializableVariableSchema>(x => x.AsSerializable()).ToArray(),
                                                 ds.Metadata.AsDictionary()));
        }
Beispiel #3
0
        /// <summary>
        /// Copies given dataset into another dataset.
        /// </summary>
        /// <param name="src">Original dataset to copy.</param>
        /// <param name="dst">Destination dataset.</param>
        /// <param name="updater">Delegate accepting update progressm notifications.</param>
        /// <returns>New instance of <see cref="DataSet"/> class.</returns>
        /// <remarks>
        /// This method splits the original dataser into parts and therefore is able
        /// to clone very large datasets not fitting to memory.
        /// </remarks>
        public static DataSet Clone(DataSet src, DataSet dst, ProgressUpdater updater)
        {
            if (src == null) throw new ArgumentNullException("src");
            if (dst == null) throw new ArgumentNullException("dst");
            if (dst.IsReadOnly)
                throw new NotSupportedException("Destination DataSet is read-only");

            // Maximum memory capacity in bytes
            ulong N = 200 * 1024 * 1024;
            // Estimated size of a single string in bytes
            int sizeofString = 100 * 1024;

            /***********************************************************************************
             * Preparing output
            ***********************************************************************************/
            bool isAutoCommit = dst.IsAutocommitEnabled;
            try
            {
                dst.IsAutocommitEnabled = false;

                DataSetSchema srcSchema = src.GetSchema();
                Dictionary<int, int> IDs = new Dictionary<int, int>();

                // Creating empty variables and copying global metadata and scalar variables
                if (updater != null)
                    updater(0, "Creating structure and copying global metadata and scalar variables...");
                VariableSchema globalMetadataVar = null;
                foreach (VariableSchema v in srcSchema.Variables)
                {
                    if (v.ID == DataSet.GlobalMetadataVariableID)
                    {
                        globalMetadataVar = v;
                        continue;
                    }

                    Variable t = dst.AddVariable(v.TypeOfData, v.Name, null, v.Dimensions.AsNamesArray());
                    IDs.Add(v.ID, t.ID);

                    foreach (var attr in v.Metadata)
                        t.Metadata[attr.Key] = attr.Value;

                    if (t.Rank == 0) // scalar
                        t.PutData(src.Variables.GetByID(v.ID).GetData());
                }
                if (globalMetadataVar != null)
                {
                    // Copying global metadata
                    foreach (var attr in globalMetadataVar.Metadata)
                        dst.Metadata[attr.Key] = attr.Value;
                }
                dst.Commit();
                // Console.Out.WriteLine("Done.\n");
                /***********************************************************************************
                 * Adjusting dimensions deltas
                ***********************************************************************************/
                Dimension[] srcDims = srcSchema.GetDimensions();
                Dictionary<string, int> deltas = new Dictionary<string, int>(srcDims.Length);
                foreach (var d in srcDims)
                    deltas[d.Name] = d.Length;

                // Console.Out.WriteLine("Total memory capacity: " + (N / 1024.0 / 1024.0).ToString("F2") + " Mb");
                ulong totalSize;
                do
                {
                    totalSize = 0;
                    foreach (var var in srcSchema.Variables)
                    {
                        if (var.Rank == 0) continue; // scalar
                        int typeSize = SizeOf(var.TypeOfData, sizeofString);

                        ulong count = 0;
                        foreach (var vdim in var.Dimensions)
                        {
                            int dimDelta = deltas[vdim.Name];
                            if (count == 0) count = (ulong)dimDelta;
                            else count *= (ulong)dimDelta;
                        }
                        totalSize += (ulong)typeSize * count;
                    }
                    if (totalSize > N)
                    {
                        string maxDim = null;
                        int max = int.MinValue;
                        foreach (var dim in deltas)
                            if (dim.Value > max)
                            {
                                max = dim.Value;
                                maxDim = dim.Key;
                            }
                        if (maxDim == null || max <= 1)
                            throw new NotSupportedException("Cannot copy the DataSet: it is too large to be copied entirely by the utility for the provided memory capacity");
                        deltas[maxDim] = max >> 1;
                    }
                } while (totalSize > N);

                // Printing deltas
                if (updater != null) updater(0, String.Format("Deltas for the dimensions adjusted (max iteration capacity: " + (totalSize / 1024.0 / 1024.0).ToString("F2") + " Mb)"));

                /***********************************************************************************
                 * Copying data
                ***********************************************************************************/
                // Console.WriteLine();
                if (updater != null) updater(0, "Copying data ...");
                Dictionary<int, int[]> origins = new Dictionary<int, int[]>(srcSchema.Variables.Length);
                Dictionary<int, int[]> shapes = new Dictionary<int, int[]>(srcSchema.Variables.Length);
                List<VariableSchema> copyVars = srcSchema.Variables.Where(vs =>
                    (vs.Rank > 0 && vs.ID != DataSet.GlobalMetadataVariableID)).ToList();

                Dictionary<string, int> dimOrigin = new Dictionary<string, int>(srcDims.Length);
                foreach (var d in srcDims)
                    dimOrigin[d.Name] = 0;

                Array.Sort(srcDims, (d1, d2) => d1.Length - d2.Length);
                int totalDims = srcDims.Length;

                do
                {
                    // for each variable:
                    for (int varIndex = copyVars.Count; --varIndex >= 0; )
                    {
                        VariableSchema var = copyVars[varIndex];
                        bool hasChanged = false;
                        // Getting its origin
                        int[] origin;
                        if (!origins.TryGetValue(var.ID, out origin))
                        {
                            origin = new int[var.Rank];
                            origins[var.ID] = origin;
                            hasChanged = true;
                        }
                        // Getting its shape
                        int[] shape;
                        if (!shapes.TryGetValue(var.ID, out shape))
                        {
                            shape = new int[var.Rank];
                            for (int i = 0; i < var.Dimensions.Count; i++)
                                shape[i] = deltas[var.Dimensions[i].Name];
                            shapes.Add(var.ID, shape);
                        }

                        // Updating origin for the variable:
                        if (!hasChanged)
                            for (int i = 0; i < shape.Length; i++)
                            {
                                int o = dimOrigin[var.Dimensions[i].Name];
                                if (origin[i] != o)
                                {
                                    hasChanged = true;
                                    origin[i] = o;
                                }
                            }
                        if (!hasChanged) // this block is already copied
                            continue;

                        bool doCopy = false;
                        bool shapeUpdated = false;
                        for (int i = 0; i < shape.Length; i++)
                        {
                            int s = origin[i] + shape[i];
                            int len = var.Dimensions[i].Length;
                            if (s > len)
                            {
                                if (!shapeUpdated)
                                {
                                    shapeUpdated = true;
                                    shape = (int[])shape.Clone();
                                }
                                shape[i] = len - origin[i];
                            }
                            if (shape[i] > 0) doCopy = true;
                        }

                        if (doCopy)
                        {
                            Array data = src.Variables.GetByID(var.ID).GetData(origin, shape);
                            // Compute real size here for strings
                            dst.Variables.GetByID(IDs[var.ID]).PutData(origin, data);
                        }
                        else // variable is copied
                        {
                            copyVars.RemoveAt(varIndex);
                        }
                    }
                    dst.Commit();

                    // Updating dimensions origin
                    bool isOver = true;
                    for (int i = 0; i < totalDims; i++)
                    {
                        Dimension dim = srcDims[i];
                        int origin = dimOrigin[dim.Name] + deltas[dim.Name];
                        if (origin < dim.Length)
                        {
                            dimOrigin[dim.Name] = origin;
                            isOver = false;
                            // Progress indicator
                            if (i == totalDims - 1)
                            {
                                double perc = (double)origin / dim.Length * 100.0;
                                if (updater != null) updater(perc, "Copying data ...");
                            }
                            break;
                        }
                        dimOrigin[dim.Name] = 0;
                    }
                    if (isOver) break;
                } while (copyVars.Count > 0);

                if (updater != null) updater(100.0, "Done.");
            }
            finally
            {
                dst.IsAutocommitEnabled = isAutoCommit;
            }

            return dst;
        }