Пример #1
0
        public TermExpression(IXTable source, IXColumn left, CompareOperator op, IXColumn right)
        {
            _evaluate = EvaluateNormal;

            // Save arguments as-is for ToString()
            _left  = left;
            _cOp   = op;
            _right = right;

            // Disallow constant <op> constant [likely error not wrapping column name]
            if (_left is ConstantColumn && _right is ConstantColumn)
            {
                throw new ArgumentException($"({left} {op.ToQueryForm()} {right}) is comparing two constants. Wrap [ColumnNames] in braces.");
            }

            // If the left side is a constant and the operator can be swapped, move it to the right side.
            // Comparers can check if the right side is constant and run a faster loop when that's the case.
            if (_left.IsConstantColumn() && !(_right.IsConstantColumn()))
            {
                if (op.TryInvertCompareOperator(out op))
                {
                    _left  = right;
                    _right = left;
                }
            }

            // Disallow unquoted constants used as strings
            if (_right.IsConstantColumn() && _left.ColumnDetails.Type == typeof(String8) && _right.ColumnDetails.Type == typeof(String8))
            {
                ConstantColumn cRight = _right as ConstantColumn;
                if (cRight != null && !cRight.IsNull && cRight.WasUnwrappedLiteral)
                {
                    throw new ArgumentException($"{right} is compared to a string, but is unquoted. Strings must be quoted.");
                }
            }

            // Convert the right side to the left side type if required
            // This means constants will always be casted to the other side type.
            if (_left.ColumnDetails.Type != _right.ColumnDetails.Type)
            {
                _right = CastedColumn.Build(source, _right, _left.ColumnDetails.Type, ValueKinds.Invalid);
            }

            // Get the left and right getters
            _leftGetter  = _left.CurrentGetter();
            _rightGetter = _right.CurrentGetter();

            // Null comparison is generic
            if (_right.IsNullConstant())
            {
                if (op == CompareOperator.Equal)
                {
                    _comparer = WhereIsNull;
                }
                else if (op == CompareOperator.NotEqual)
                {
                    _comparer = WhereIsNotNull;
                }
                else
                {
                    throw new ArgumentException($"Only equals and not equals operators are supported against null.");
                }
            }
            else if (_left.IsNullConstant())
            {
                _left = _right;
                if (op == CompareOperator.Equal)
                {
                    _comparer = WhereIsNull;
                }
                else if (op == CompareOperator.NotEqual)
                {
                    _comparer = WhereIsNotNull;
                }
                else
                {
                    throw new ArgumentException($"Only equals and not equals operators are supported against null.");
                }
            }
            else
            {
                // Get a comparer which can compare the values
                _comparer = TypeProviderFactory.Get(left.ColumnDetails.Type).TryGetComparer(op);
                if (_comparer == null)
                {
                    throw new ArgumentException($"No comparer found for type {left.ColumnDetails.Type.Name}.");
                }
            }

            // Optimize Enum to Constant comparisons to use the underlying indices
            if (_left.IsEnumColumn() && _right.IsConstantColumn())
            {
                // Get an optimized comparer against the indices rather than values
                IXColumn replacedRight = _right;
                _comparer = SetComparer.ConvertToEnumIndexComparer(_left, _comparer, ref replacedRight, source);

                // Get the indices on the left side
                _leftGetter = _left.IndicesCurrentGetter();

                // Use the updated value for the right side
                _rightGetter = replacedRight.CurrentGetter();
            }

            // Allow String8 to constant Contains queries to compare on the raw byte[] and int[]
            if (op == CompareOperator.Contains && _right.IsConstantColumn() && _left.ColumnDetails.Type == typeof(String8) && !_left.IsEnumColumn())
            {
                Func <object> rawGetter = _left.ComponentGetter(ColumnComponent.String8Raw);

                if (rawGetter != null)
                {
                    String8         rightValue      = (String8)_right.ValuesGetter()().Array.GetValue(0);
                    String8Comparer string8Comparer = new String8Comparer();

                    _evaluate = (vector) =>
                    {
                        String8Raw raw = (String8Raw)rawGetter();
                        string8Comparer.WhereContains(raw, rightValue, vector);
                    };
                }
            }
        }
Пример #2
0
        /// <summary>
        ///  Build a GroupBy Dictionary for Peek.
        /// </summary>
        /// <remarks>
        ///  Peek identifies each distinct common value and the approximate percentage of rows with it.
        ///  If we have many matching rows, we can sample - the sample will have any common values in it.
        ///  However, we don't know how many matches we have in advance.
        ///  Therefore, we build a Dictionary of all rows, 1/8 of rows, 1/64 of rows, and 1/512 of rows.
        ///  As soon as a given sample has enough samples to be statistically valid, we stop collecting the larger subsets.
        ///  This strategy allows us to run the overall query only once, end up with a large enough sample, and avoid building giant Dictionaries.
        /// </remarks>
        /// <param name="cancellationToken">CancellationToken to request early stop</param>
        private void BuildDictionary(CancellationToken cancellationToken)
        {
            // Short-circuit path if there's one key column and it's an EnumColumn
            if (_column.IsEnumColumn())
            {
                BuildSingleEnumColumnDictionary(cancellationToken);
                return;
            }

            // Build a Random instance to sample rows
            Random r = new Random();

            // Build a Dictionary and CountAggregator for each sample
            GroupByDictionary[] dictionaries = new GroupByDictionary[SampleCount];
            CountAggregator[]   counts       = new CountAggregator[SampleCount];
            int[][]             remapArrays  = new int[SampleCount][];
            for (int i = 0; i < SampleCount; ++i)
            {
                dictionaries[i] = new GroupByDictionary(new ColumnDetails[] { _column.ColumnDetails });
                counts[i]       = new CountAggregator();
            }

            // Retrieve the column getter
            Func <XArray> columnGetter = _column.CurrentGetter();

            // Track which sample we'll currently report
            int currentSample = 0;

            XArray[] arrays = new XArray[1];
            int      count;

            while ((count = _source.Next(XTableExtensions.DefaultBatchSize, cancellationToken)) != 0)
            {
                // Get the column values
                arrays[0] = columnGetter();

                // Build the GroupBy count for all rows and successive 1/8 samples
                for (int i = 0; i < SampleCount; ++i)
                {
                    // Add these to the Join Dictionary
                    if (i >= currentSample)
                    {
                        // Choose buckets for each row
                        XArray indicesForRows = dictionaries[i].FindOrAdd(arrays);

                        // Identify the bucket for each row and aggregate them
                        counts[i].Add(indicesForRows, dictionaries[i].Count);

                        // If this sample now has enough values, stop collecting bigger row sets
                        if (currentSample == i - 1 && counts[i].TotalRowCount > RequiredSampleSize)
                        {
                            // If every row was unique, stop early and don't set outputs (zero rows)
                            if (ShouldStopEarly(dictionaries[currentSample], counts[currentSample]))
                            {
                                return;
                            }

                            dictionaries[currentSample] = null;
                            counts[currentSample]       = null;
                            currentSample++;
                        }
                    }

                    // Each successive dictionary has ~1/8 of the rows of the previous one
                    if (i < SampleCount - 1)
                    {
                        ArraySelector sample = Sampler.Eighth(arrays[0].Selector, r, ref remapArrays[i]);
                        arrays[0] = arrays[0].Reselect(sample);
                    }
                }
            }

            // Once the loop is done, get the distinct values and aggregation results
            PostSortAndFilter(dictionaries[currentSample].DistinctKeys()[0], counts[currentSample].Values, counts[currentSample].TotalRowCount, currentSample == 0);
        }