Пример #1
0
        /// <summary>
        ///  Add or Update the given items with the given values. The specific values added are represented in the linked list
        ///  starting at partitionChains[chainHead]
        /// </summary>
        /// <param name="values">Set of Columns and values to add or update</param>
        /// <param name="partitionChains">storage for the set of linked lists indicating which values are in each partition.  The index is the row number in values
        /// of the corresponding item.  The value is the next item in the chain with -1 indicating the end.</param>
        /// <param name="chainHead">starting index for the list of items that this partition should add</param>
        public void AddOrUpdate(DataBlock.ReadOnlyDataBlock values, AddOrUpdateOptions options)
        {
            int columnCount   = values.ColumnCount;
            int idColumnIndex = values.IndexOfColumn(this.IDColumn.Name);

            // Look up the LID for each item or add it
            ushort[] itemLIDs = FindOrAssignLIDs(values, idColumnIndex, options.Mode);

            // If there are new items, resize every column for them
            ushort newCount = (ushort)(_itemCount);

            foreach (IColumn <object> column in this.Columns.Values)
            {
                if (column.Count != newCount)
                {
                    column.SetSize(newCount);
                }
            }

            // Set values for each other provided column
            for (int columnIndex = 0; columnIndex < columnCount; ++columnIndex)
            {
                FillPartitionColumn(values, columnIndex, itemLIDs);
            }

            // Commit every column [ones with new values and ones resized with defaults]
            foreach (IColumn <object> column in this.Columns.Values)
            {
                if (column is ICommittable)
                {
                    (column as ICommittable).Commit();
                }
            }
        }
Пример #2
0
        public void Partition_AddOrUpdate_IncludeOnlyFromArray()
        {
            Partition p = new Partition(PartitionMask.All);

            AddColumns(p);

            // Get sample items but ask the partition only to add some of them
            DataBlock block = BuildSampleData();

            int[] itemIndexes                           = new int[] { 0, 1, 2, 3, 4 };
            int[] partitionStartIndexes                 = new int[] { 0, 2 };
            DataBlock.ReadOnlyDataBlock roBlock         = block;
            DataBlock.ReadOnlyDataBlock chainProjection = roBlock.ProjectChain(itemIndexes, partitionStartIndexes[1], 3);
            p.AddOrUpdate(chainProjection, new AddOrUpdateOptions());

            // Verify only the right items were added
            SelectQuery q = new SelectQuery();

            q.Columns           = new string[] { "ID" };
            q.Where             = new AllExpression();
            q.OrderByColumn     = "ID";
            q.OrderByDescending = false;
            SelectResult result = p.Query(q);

            Assert.AreEqual(3, (int)result.Total);
            Assert.AreEqual("11943", result.Values[0, 0].ToString());
            Assert.AreEqual("11999", result.Values[1, 0].ToString());
            Assert.AreEqual("12505", result.Values[2, 0].ToString());
        }
Пример #3
0
            /// <summary>
            /// Computes the target partition for each item in the ReadOnlyDataBlock
            /// </summary>
            /// <param name="table">Table where values will be added</param>
            /// <param name="values">DataBlock containing values to be added to the table</param>
            /// <param name="idColumnIndex">Index of the id column</param>
            /// <param name="partitionIds">[Out] array of the partition ids for each element</param>
            public void ComputePartition(Table table, DataBlock.ReadOnlyDataBlock values, int idColumnIndex, out int[] partitionIds, out TargetPartitionInfo[] partitionInfo)
            {
                int rowCount = values.RowCount;

                // TODO: [danny chen] it would be nice if I could get rid of this tunneling of GetColumn
                // from the ReadOnlyDataBlock (and avoid the special casing for non-projected blocks)
                // but I can't see a way to allow strongly types random access without a bunch of work
                // incurred on each access (fetch, cast the array).
                T[] idColumn = (T[])values.GetColumn(idColumnIndex);

                int[] localPartitionIds = new int[rowCount];
                TargetPartitionInfo[] localPartitionInfo = new TargetPartitionInfo[table.PartitionCount];

                var rangePartitioner = Partitioner.Create(0, rowCount);

                Parallel.ForEach(rangePartitioner,
                                 delegate(Tuple <int, int> range, ParallelLoopState unused)
                {
                    ValueTypeReference <T> vtr = new ValueTypeReference <T>();
                    Value v = Value.Create(null);
                    for (int i = range.Item1; i < range.Item2; ++i)
                    {
                        // Hash the ID for each item and compute the partition that the item belongs to
                        vtr.Value = idColumn[i];
                        v.Assign(vtr);
                        int idHash      = v.GetHashCode();
                        int partitionId = PartitionMask.IndexOfHash(idHash, table._partitionBits);

                        localPartitionIds[i] = partitionId;
                        Interlocked.Increment(ref localPartitionInfo[partitionId].Count);
                    }
                });

                int nextStartIndex = 0;

                for (int i = 0; i < table.PartitionCount; ++i)
                {
                    if (localPartitionInfo[i].Count == 0)
                    {
                        localPartitionInfo[i].StartIndex = -1;
                    }
                    else
                    {
                        localPartitionInfo[i].StartIndex = nextStartIndex;
                        nextStartIndex += localPartitionInfo[i].Count;

                        // NOTE: Count field is cleared here because it is
                        //   reused to track per-partition indexes when
                        //   building up the sort key data
                        localPartitionInfo[i].Count = 0;
                    }
                }

                partitionIds  = localPartitionIds;
                partitionInfo = localPartitionInfo;
            }
Пример #4
0
        private ushort[] FindOrAssignLIDs(DataBlock.ReadOnlyDataBlock values, int idColumnIndex, AddOrUpdateMode mode)
        {
            Type idColumnDataType = values.GetTypeForColumn(idColumnIndex);

            // If the insert array matches types with the column then we can use the native type to do a direct assignment from the input array
            // to the column array.  If the types do not match, we need to fallback to object to allow the Value class to handle the type conversion
            ITypedAddOrUpdateWorker worker = NativeContainer.CreateTypedInstance <ITypedAddOrUpdateWorker>(typeof(AddOrUpdateWorker <>), idColumnDataType);

            return(worker.FindOrAssignLIDs(this, values, idColumnIndex, mode));
        }
Пример #5
0
        public void AddColumnsFromBlock(DataBlock.ReadOnlyDataBlock values)
        {
            bool foundIdColumn = (_partitions[0].IDColumn != null);
            List <ColumnDetails> discoveredNewColumns = new List <ColumnDetails>();

            for (int columnIndex = 0; columnIndex < values.ColumnCount; ++columnIndex)
            {
                // Get the column name from the block
                string columnName = values.Columns[columnIndex].Name;

                // Add or alter columns only which weren't manually added
                if (_partitions[0].ContainsColumn(columnName))
                {
                    continue;
                }

                // Make the ID column the first one to end with 'ID' or the first column
                bool isIdColumn = (foundIdColumn == false && columnName.EndsWith("ID"));

                // Walk all values in this block to infer the column type
                Type bestColumnType = null;
                for (int rowIndex = 0; rowIndex < values.RowCount; ++rowIndex)
                {
                    bestColumnType = Value.Create(values[rowIndex, columnIndex]).BestType(bestColumnType);
                }

                // If no values were set, default to string [can't tell actual best type]
                if (bestColumnType == null)
                {
                    bestColumnType = typeof(String);
                }

                discoveredNewColumns.Add(new ColumnDetails(columnName, bestColumnType.Name, null)
                {
                    IsPrimaryKey = isIdColumn
                });
                foundIdColumn |= isIdColumn;
            }

            // If no column name ended with 'ID', the first one is the ID column
            if (!foundIdColumn && discoveredNewColumns.Count > 0)
            {
                discoveredNewColumns[0].IsPrimaryKey = true;
            }

            // Add the discovered columns. If any names match existing columns they'll be merged properly in Partition.AddColumn.
            AddColumns(discoveredNewColumns);
        }
Пример #6
0
        private void FillPartitionColumn(DataBlock.ReadOnlyDataBlock values, int columnIndex, ushort[] itemLIDs)
        {
            string columnName = values.Columns[columnIndex].Name;

            if (columnName.Equals(this.IDColumn.Name, StringComparison.OrdinalIgnoreCase))
            {
                return;
            }

            Type dataBlockColumnDataType = values.GetTypeForColumn(columnIndex);

            // If the insert array matches types with the column then we can use the native type to do a direct assignment from the input array
            // to the column array.  If the types do not match, we need to fallback to object to allow the Value class to handle the type conversion
            ITypedAddOrUpdateWorker worker = NativeContainer.CreateTypedInstance <ITypedAddOrUpdateWorker>(typeof(AddOrUpdateWorker <>), dataBlockColumnDataType);

            worker.FillPartitionColumn(this, values, columnIndex, itemLIDs);
        }
Пример #7
0
            public void FillPartitionColumn(Partition p, DataBlock.ReadOnlyDataBlock values, int columnIndex, ushort[] itemLIDs)
            {
                string columnName = values.Columns[columnIndex].Name;

                if (columnName.Equals(p.IDColumn.Name, StringComparison.OrdinalIgnoreCase))
                {
                    return;
                }

                IUntypedColumn untypedColumn = p.Columns[columnName];
                IColumn <T>    typedColumn   = null;

                if (typeof(T) == untypedColumn.ColumnType)
                {
                    typedColumn = (IColumn <T>)untypedColumn.InnerColumn;
                }

                for (int rowIndex = 0; rowIndex < values.RowCount; ++rowIndex)
                {
                    T value = values.GetValueT <T>(rowIndex, columnIndex);
                    // If the item is new and no LID was assigned, we don't set values
                    if (itemLIDs[rowIndex] == ushort.MaxValue)
                    {
                        continue;
                    }
                    try
                    {
                        if (typedColumn != null)
                        {
                            typedColumn[itemLIDs[rowIndex]] = value;
                        }
                        else
                        {
                            untypedColumn[itemLIDs[rowIndex]] = value;
                        }
                    }
                    catch (Exception ex)
                    {
                        throw new ArribaWriteException(values[rowIndex, 0], columnName, value, ex);
                    }
                }
            }
Пример #8
0
            public ushort[] FindOrAssignLIDs(Partition p, DataBlock.ReadOnlyDataBlock values, int idColumnIndex, AddOrUpdateMode mode)
            {
                // TODO: consider keeping one instance of the worker long term? if so, this becomes a private class field
                ValueTypeReference <T> vtr = new ValueTypeReference <T>();
                Value v = Value.Create(null);

                ushort[] itemLIDs = new ushort[values.RowCount];
                int      addCount = 0;

                IUntypedColumn idColumn      = p.Columns[p.IDColumn.Name];
                IColumn <T>    typedIdColumn = null;

                if (typeof(T) == idColumn.ColumnType)
                {
                    typedIdColumn = (IColumn <T>)idColumn.InnerColumn;
                }

                for (int index = 0; index < values.RowCount; ++index)
                {
                    // Look for the LIDs a
                    T externalID = values.GetValueT <T>(index, idColumnIndex);
                    if (typedIdColumn != null)
                    {
                        typedIdColumn.TryGetIndexOf(externalID, out itemLIDs[index]);
                    }
                    else
                    {
                        idColumn.TryGetIndexOf(externalID, out itemLIDs[index]);
                    }

                    if (itemLIDs[index] == ushort.MaxValue)
                    {
                        addCount++;
                    }

                    // Verify this item was routed to the right partition
                    vtr.Value = externalID;
                    v.Assign(vtr);
                    int idHash = v.GetHashCode();
                    if (!p.Mask.Matches(idHash))
                    {
                        throw new ArribaException(StringExtensions.Format("Item with ID '{0}', hash '{1:x}' incorrectly routed to Partition {2}.", externalID, idHash, p.Mask));
                    }
                }

                // Go back and add the items which need to be added in a batch
                if (mode != AddOrUpdateMode.UpdateAndIgnoreAdds)
                {
                    Dictionary <T, ushort> newlyAssignedLIDs = null;

                    for (int index = 0; index < values.RowCount; ++index)
                    {
                        T      idValue = values.GetValueT <T>(index, idColumnIndex);
                        ushort lid     = itemLIDs[index];

                        // If this is an add...
                        if (lid == ushort.MaxValue)
                        {
                            // If we have adds, we'll need to track new IDs
                            if (newlyAssignedLIDs == null)
                            {
                                newlyAssignedLIDs = new Dictionary <T, ushort>(addCount);
                            }

                            T externalID = idValue;

                            // If this ID was already added in this batch, this time it's an update
                            if (newlyAssignedLIDs.TryGetValue(externalID, out lid) == false)
                            {
                                // If in "UpdateOnly" mode, throw
                                if (mode == AddOrUpdateMode.UpdateOnly)
                                {
                                    throw new ArribaWriteException(externalID, p.IDColumn.Name, externalID,
                                                                   new ArribaException("AddOrUpdate was in UpdateOnly mode but contained a new ID, which is an error."));
                                }

                                // If this was a new item and not added in this batch, assign it a LID
                                lid = p._itemCount;

                                if (lid == ushort.MaxValue)
                                {
                                    throw new ArribaWriteException(externalID, p.IDColumn.Name, externalID,
                                                                   new ArribaException("Column full in Partition. Unable to add items."));
                                }

                                p._itemCount++;
                                idColumn.SetSize((ushort)(p._itemCount));

                                if (typedIdColumn != null)
                                {
                                    typedIdColumn[lid] = externalID;
                                }
                                else
                                {
                                    idColumn[lid] = externalID;
                                }

                                newlyAssignedLIDs[externalID] = lid;
                            }
                        }

                        itemLIDs[index] = lid;
                    }

                    // Commit the updates to the values column if the column requires it (FastAddSortedColumn does)
                    if (idColumn is ICommittable)
                    {
                        (idColumn as ICommittable).Commit();
                    }
                }

                return(itemLIDs);
            }
Пример #9
0
        /// <summary>
        ///  Add or Update the given items with the given values. The ID column must be passed
        ///  and must be the first column. If an ID is not known, the item will be added.
        ///  For each item, the value for each column is set to the provided values.
        /// </summary>
        /// <param name="values">Set of Columns and values to add or update</param>
        /// <param name="options">Options to adjust behavior of AddOrUpdate</param>
        public void AddOrUpdate(DataBlock.ReadOnlyDataBlock values, AddOrUpdateOptions options)
        {
            _locker.EnterWriteLock();
            try
            {
                // Add columns from data, if this is the first data and columns weren't predefined
                if (options.AddMissingColumns)
                {
                    AddColumnsFromBlock(values);
                }

                ColumnDetails idColumn = _partitions[0].IDColumn;
                if (idColumn == null)
                {
                    throw new ArribaException("Items cannot be added to this Table because it does not yet have an ID column defined. Call AddColumn with exactly one column with 'IsPrimaryKey' true and then items may be added.");
                }
                int idColumnIndex = values.IndexOfColumn(idColumn.Name);
                if (idColumnIndex == -1)
                {
                    throw new ArribaException(StringExtensions.Format("AddOrUpdates must be passed the ID column, '{0}', in order to tell which items to update.", idColumn.Name));
                }

                // Verify all passed columns exist
                foreach (ColumnDetails column in values.Columns)
                {
                    ColumnDetails foundColumn;
                    if (!_partitions[0].DetailsByColumn.TryGetValue(column.Name, out foundColumn))
                    {
                        throw new ArribaException(StringExtensions.Format("AddOrUpdate failed because values were passed for column '{0}', which is not in the table. Use AddColumn to add all columns first or ensure the first block added to the Table has all desired columns.", column.Name));
                    }
                }

                // Non-Parallel Implementation
                if (_partitions.Count == 1)
                {
                    _partitions[0].AddOrUpdate(values, options);
                    return;
                }

                // Determine the partition each item should go to
                int[] partitionIds;
                TargetPartitionInfo[] partitionInfo;
                Type idColumnArrayType = values.GetTypeForColumn(idColumnIndex);
                if (_splitter == null || _splitter.Item2 == null || _splitter.Item1 != idColumnArrayType)
                {
                    IComputePartition splitter = NativeContainer.CreateTypedInstance <IComputePartition>(typeof(ComputePartitionHelper <>), idColumnArrayType);
                    _splitter = Tuple.Create(idColumnArrayType, splitter);
                }
                _splitter.Item2.ComputePartition(this, values, idColumnIndex, out partitionIds, out partitionInfo);

                // Sort/group the incoming items by paritition and then by index to ensure they
                // are processed in the order they were presented in the input ReadOnlyDataBlock
                int[] sortOrder = new int[values.RowCount];
                for (int i = 0; i < values.RowCount; ++i)
                {
                    int p          = partitionIds[i];
                    int startIndex = partitionInfo[p].StartIndex + partitionInfo[p].Count;
                    sortOrder[startIndex] = i;
                    partitionInfo[p].Count++;
                }

                Action <Tuple <int, int>, ParallelLoopState> forBody =
                    delegate(Tuple <int, int> range, ParallelLoopState unused)
                {
                    for (int p = range.Item1; p < range.Item2; ++p)
                    {
                        int startIndex = partitionInfo[p].StartIndex;
                        int length     = partitionInfo[p].Count;
                        DataBlock.ReadOnlyDataBlock partitionValues = values.ProjectChain(sortOrder, startIndex, length);
                        _partitions[p].AddOrUpdate(partitionValues, options);
                    }
                };

                // In parallel, each partition will add items which belong to it
                if (this.RunParallel)
                {
                    var rangePartitioner = Partitioner.Create(0, _partitions.Count);
                    Parallel.ForEach(rangePartitioner, this.ParallelOptions, forBody);
                }
                else
                {
                    var range = Tuple.Create(0, _partitions.Count);
                    forBody(range, null);
                }
            }
            finally
            {
                _locker.ExitWriteLock();
            }
        }
Пример #10
0
 /// <summary>
 ///  Add or Update the given items with the given values. The ID column must be passed
 ///  and must be the first column. If an ID is not known, the item will be added.
 ///  For each item, the value for each column is set to the provided values.
 /// </summary>
 /// <param name="values">Set of Columns and values to add or update</param>
 public void AddOrUpdate(DataBlock.ReadOnlyDataBlock values)
 {
     AddOrUpdate(values, new AddOrUpdateOptions());
 }
Пример #11
0
        public void AddColumnsFromBlock(DataBlock.ReadOnlyDataBlock values)
        {
            List <ColumnDetails> columnsToAdd = new List <ColumnDetails>();

            // Find the ID column
            //  [The existing one, or one marked as primary key on the block, or one ending with 'ID', or the first column]
            ColumnDetails idColumn = _partitions[0].IDColumn
                                     ?? values.Columns.FirstOrDefault((cd) => cd.IsPrimaryKey)
                                     ?? values.Columns.FirstOrDefault((cd) => cd.Name.EndsWith("ID"))
                                     ?? values.Columns.FirstOrDefault();

            // Mark the ID column
            idColumn.IsPrimaryKey = true;

            for (int columnIndex = 0; columnIndex < values.ColumnCount; ++columnIndex)
            {
                ColumnDetails details             = values.Columns[columnIndex];
                bool          hasNonDefaultValues = false;

                // If this column was already added, no need to scan these values
                if (_partitions[0].ContainsColumn(details.Name))
                {
                    continue;
                }

                // Figure out the column type. Did the DataBlock provide one?
                Type determinedType = ColumnFactory.GetTypeFromTypeString(details.Type);

                // If not, is the DataBlock column array typed?
                determinedType = determinedType ?? values.GetTypeForColumn(columnIndex);
                if (determinedType == typeof(object) || determinedType == typeof(Value))
                {
                    determinedType = null;
                }

                // Get the column default, if provided, or the default for the type, if provided
                object columnDefault = details.Default;
                if (columnDefault == null && determinedType != null)
                {
                    columnDefault = ColumnFactory.GetDefaultValueFromTypeString(determinedType.Name);
                }

                Type     inferredType = null;
                Value    v            = Value.Create(null);
                DateTime defaultUtc   = default(DateTime).ToUniversalTime();

                for (int rowIndex = 0; rowIndex < values.RowCount; ++rowIndex)
                {
                    object value = values[rowIndex, columnIndex];

                    // Identify the best type for all block values, if no type was already determined
                    if (determinedType == null)
                    {
                        v.Assign(value);
                        Type newBestType = v.BestType(inferredType);

                        // If the type has changed, get an updated default value
                        if (newBestType != determinedType)
                        {
                            columnDefault = ColumnFactory.GetDefaultValueFromTypeString(newBestType.Name);
                            inferredType  = newBestType;
                        }
                    }

                    // Track whether any non-default values were seen [could be raw types or Value wrapper]
                    if (hasNonDefaultValues == false && value != null && !value.Equals("") && !value.Equals(defaultUtc))
                    {
                        if (columnDefault == null || value.Equals(columnDefault) == false)
                        {
                            hasNonDefaultValues = true;
                        }
                    }
                }

                // Set the column type
                if (String.IsNullOrEmpty(details.Type) || details.Type.Equals(Arriba.Model.Column.ColumnDetails.UnknownType))
                {
                    details.Type = ColumnFactory.GetCanonicalTypeName(determinedType ?? inferredType ?? typeof(string));
                }

                // Add the column if it had any non-default values (and didn't already exist)
                if (hasNonDefaultValues || details.IsPrimaryKey)
                {
                    columnsToAdd.Add(details);
                }
            }

            // Add the discovered columns. If any names match existing columns they'll be merged properly in Partition.AddColumn.
            AddColumns(columnsToAdd);
        }