Beispiel #1
0
            public void ExportToDictionary(Dictionary <string, List <ushort> > result)
            {
                IList <ushort> sortedWords;
                int            sortedWordsCount;

                _words.TryGetSortedIndexes(out sortedWords, out sortedWordsCount);

                for (int i = 0; i < _words.Count; ++i)
                {
                    ushort lid  = sortedWords[i];
                    string word = _words[lid].ToString();

                    ShortSet set = new ShortSet(ushort.MaxValue);
                    GetInSet(lid, set);

                    List <ushort> page;
                    if (!result.TryGetValue(word, out page))
                    {
                        page         = new List <ushort>();
                        result[word] = page;
                    }

                    page.AddRange(set.Values);
                }
            }
Beispiel #2
0
            /// <summary>
            ///  Add matches to the given set for all words starting with the provided prefix.
            /// </summary>
            /// <param name="prefix">Prefix for which to add matches</param>
            /// <param name="result">Set to add all items containing words beginning with prefix</param>
            public void WhereMatches(ByteBlock prefix, ShortSet result)
            {
                // Look for prefixes if above the length minimum; equality otherwise
                if (prefix.Length < MinimumPrefixExpandLength)
                {
                    WhereMatchesExact(prefix, result);
                    return;
                }

                IComparable <ByteBlock> isPrefixOf = prefix.GetExtendedIComparable(ByteBlock.Comparison.IsPrefixOf);

                // Otherwise, find all words starting with this prefix
                int firstIndex = _words.FindFirstWhere(isPrefixOf);

                if (firstIndex < 0)
                {
                    return;
                }

                int lastIndex = _words.FindLastWhere(isPrefixOf);

                IList <ushort> sortedIndexes;
                int            sortedIndexescount;

                _words.TryGetSortedIndexes(out sortedIndexes, out sortedIndexescount);

                for (int i = firstIndex; i <= lastIndex; ++i)
                {
                    GetInSet(sortedIndexes[i], result);
                }
            }
Beispiel #3
0
        /// <summary>
        ///  And this set with another set (this set becomes the result).
        /// </summary>
        /// <remarks>
        ///  'And' cannot be easily computed for a set and enumerable because
        ///  it's difficult to tell quickly if values on both side are set,
        ///  especially since 'other' can be in any order.
        ///
        ///  However, thanks to De Morgan's Law [!(A && B) == (!A || !B)],
        ///  we can compute our (A && B) with !(!A || !B), avoiding creating a
        ///  second ShortSet.
        /// </remarks>
        /// <param name="other">Items with which to And</param>
        public void And(IEnumerable <ushort> other)
        {
            if (other == null)
            {
                throw new ArgumentNullException("other");
            }

            // Build a scratch set (once)
            if (_scratchSet == null)
            {
                _scratchSet = new ShortSet(_capacity);
            }
            else
            {
                _scratchSet.Clear();
            }

            // Set values in 'other'
            foreach (ushort value in other)
            {
                if (value < _capacity)
                {
                    _scratchSet.Add(value);
                }
            }

            // And with this
            _scratchSet.And(this);

            // Swap bitVectors with the scratchSet
            ulong[] thisVector = _bitVector;
            _bitVector             = _scratchSet._bitVector;
            _scratchSet._bitVector = _bitVector;
        }
Beispiel #4
0
        /// <summary>
        ///  Find the items containing *any word* starting with the word passed
        ///  and add the union of all of them to the result set passed.
        /// </summary>
        /// <param name="prefix">Prefix to find</param>
        /// <param name="result">Result to which to add matches</param>
        public void WhereMatches(ByteBlock prefix, ShortSet result)
        {
            if (result == null)
            {
                throw new ArgumentNullException("result");
            }

            // Split each word in the input value
            RangeSet prefixWords = _splitter.Split(prefix);

            // Shortcut: If only one word, add directly to result set
            if (prefixWords.Count == 1 && prefixWords.Ranges[0].Length == prefix.Length)
            {
                // Add matches for words starting with this prefix in every block
                foreach (WordIndexBlock block in _blocks)
                {
                    block.WhereMatches(prefix, result);
                }
            }
            else
            {
                // We need to add (OR) the items which match all words (AND) in the split prefix
                ShortSet matchesForAllWords = null;
                ShortSet matchesForWord     = new ShortSet(result.Capacity);

                // For each found word, add all matches
                for (int i = 0; i < prefixWords.Count; ++i)
                {
                    Range     word      = prefixWords.Ranges[i];
                    ByteBlock wordBlock = new ByteBlock(prefix.Array, word.Index, word.Length);

                    matchesForWord.Clear();

                    // Add matches for words starting with this prefix in every block
                    foreach (WordIndexBlock block in _blocks)
                    {
                        block.WhereMatches(wordBlock, matchesForWord);
                    }

                    // AND matches for this word with each other word in the prefix
                    if (matchesForAllWords == null)
                    {
                        matchesForAllWords = new ShortSet(result.Capacity);
                        matchesForAllWords.Or(matchesForWord);
                    }
                    else
                    {
                        matchesForAllWords.And(matchesForWord);
                    }
                }

                // OR matches for ALL words with the final result
                if (matchesForAllWords != null)
                {
                    result.Or(matchesForAllWords);
                }
            }
        }
Beispiel #5
0
        public void TryWhere(Operator op, ByteBlock value, ShortSet result, ExecutionDetails details)
        {
            if (details == null)
            {
                throw new ArgumentNullException("details");
            }

            // Base Column can't identify matches for any operator in bulk efficiently.
            details.AddError(ExecutionDetails.ColumnDoesNotSupportOperator, op, this.Name);
        }
Beispiel #6
0
            /// <summary>
            ///  Add matches to the given set for all items with the exact value passed.
            /// </summary>
            /// <param name="value">Word for which to add matches</param>
            /// <param name="result">Set to add all items containing the word to</param>
            public void WhereMatchesExact(ByteBlock value, ShortSet result)
            {
                ushort index;

                _words.TryGetIndexOf(value, out index);

                if (index != ushort.MaxValue)
                {
                    GetInSet(index, result);
                }
            }
Beispiel #7
0
        /// <summary>
        ///  Find the items containing the exact word passed and add them to
        ///  the result set passed.
        /// </summary>
        /// <param name="word">Word to find</param>
        /// <param name="result">Result to which to add matches</param>
        public void WhereMatchExact(ByteBlock word, ShortSet result)
        {
            // Find the block with the exact word and find matches
            foreach (WordIndexBlock block in _blocks)
            {
                ushort wordIndex = block.IndexOf(word);

                if (wordIndex != ushort.MaxValue)
                {
                    block.GetInSet(wordIndex, result);
                    return;
                }
            }
        }
Beispiel #8
0
        /// <summary>
        ///  AndNot this set with another set (this set becomes the result), up
        ///  to our capacity.
        /// </summary>
        /// <param name="other">ShortSet with which to AndNot</param>
        public void AndNot(ShortSet other)
        {
            if (other == null)
            {
                throw new ArgumentNullException("other");
            }

            // AndNot away values in other. This will not set values above our capacity,
            // since they're already 0 on our side. This will not clear values above their
            // capacity, because they are already 0 on their side.
            int length = Math.Min(_bitVector.Length, other._bitVector.Length);

            for (int i = 0; i < length; ++i)
            {
                _bitVector[i] = _bitVector[i] & ~other._bitVector[i];
            }
        }
Beispiel #9
0
        /// <summary>
        ///  Copy the values in other to this set, overwriting current set values.
        /// </summary>
        /// <param name="other">ShortSet with which to And</param>
        public void From(ShortSet other)
        {
            if (other == null)
            {
                throw new ArgumentNullException("other");
            }

            // Copy from other
            int length = Math.Min(_bitVector.Length, other._bitVector.Length);

            for (int i = 0; i < length; ++i)
            {
                _bitVector[i] = other._bitVector[i];
            }

            // Clear our values above other capacity, if any
            ClearAboveLength(length);
        }
Beispiel #10
0
        /// <summary>
        ///  OrNot this set with another set (this set becomes the result), up
        ///  to our capacity.
        /// </summary>
        /// <param name="other">ShortSet with which to AndNot</param>
        public void OrNot(ShortSet other)
        {
            if (other == null)
            {
                throw new ArgumentNullException("other");
            }

            // OrNot away values in other.
            int length = Math.Min(_bitVector.Length, other._bitVector.Length);

            for (int i = 0; i < length; ++i)
            {
                _bitVector[i] = _bitVector[i] | ~other._bitVector[i];
            }

            // Clear back to our capacity.
            TrimToCapacity();
        }
Beispiel #11
0
        /// <summary>
        ///  Or this set with another set (this set becomes the result), up to
        ///  our capacity.
        /// </summary>
        /// <param name="other">ShortSet with which to Or</param>
        public void Or(ShortSet other)
        {
            if (other == null)
            {
                throw new ArgumentNullException("other");
            }

            // Or parts in both. This may set values above our capacity in the last ulong.
            int length = Math.Min(_bitVector.Length, other._bitVector.Length);

            for (int i = 0; i < length; ++i)
            {
                _bitVector[i] |= other._bitVector[i];
            }

            // Clear back to our capacity.
            TrimToCapacity();
        }
Beispiel #12
0
        /// <summary>
        ///  And this set with another set (this set becomes the result), up
        ///  to our capacity.
        /// </summary>
        /// <param name="other">ShortSet with which to And</param>
        public void And(ShortSet other)
        {
            if (other == null)
            {
                throw new ArgumentNullException("other");
            }

            // And parts in both. Values above other capacity will be zero, clearing them in our set.
            int length = Math.Min(_bitVector.Length, other._bitVector.Length);

            for (int i = 0; i < length; ++i)
            {
                _bitVector[i] &= other._bitVector[i];
            }

            // Clear our values above other capacity, if any
            ClearAboveLength(length);
        }
Beispiel #13
0
            /// <summary>
            ///  Get the IDs listed in the set for a given word and add them
            ///  to a result ShortSet.
            /// </summary>
            /// <param name="setId">ID of set/word to add</param>
            /// <param name="result">ShortSet to which to add results</param>
            public unsafe void GetInSet(ushort setId, ShortSet result)
            {
                ByteBlock set = _sets[setId];

                fixed(byte *array = set.Array)
                {
                    if (set.Length < DenseSetLengthCutoff)
                    {
                        // Sparse Set: Add values as individual ushorts.
                        ushort *valuesForWord = (ushort *)(array + set.Index);
                        ushort  usedLength    = FindUsedLength(valuesForWord, (ushort)(set.Length / 2));
                        result.Or(valuesForWord, usedLength);
                    }
                    else
                    {
                        // Dense Set: Add values as ulong bits.
                        ulong *bitsForWord = (ulong *)(array + set.Index);
                        result.Or(bitsForWord, (ushort)(set.Length / 8));
                    }
                }
            }
Beispiel #14
0
        /// <summary>
        ///  Set this set equal to (left AND right), overwriting current values.
        /// </summary>
        /// <param name="left">First ShortSet to And</param>
        /// <param name="right">Second ShortSet to And</param>
        public unsafe void FromAnd(ShortSet left, ShortSet right)
        {
            if (left == null)
            {
                throw new ArgumentNullException("left");
            }
            if (right == null)
            {
                throw new ArgumentNullException("right");
            }

            int length = Math.Min(_bitVector.Length, Math.Min(left._bitVector.Length, right._bitVector.Length));

            if (UseNativeSupport)
            {
                fixed(ulong *thisA = &_bitVector[0])
                {
                    fixed(ulong *leftA = &left._bitVector[0])
                    {
                        fixed(ulong *rightA = &right._bitVector[0])
                        {
                            NativeMethods.AndSets(thisA, leftA, rightA, length);
                        }
                    }
                }
            }
            else
            {
                // Copy from (left & right)
                for (int i = 0; i < length; ++i)
                {
                    _bitVector[i] = left._bitVector[i] & right._bitVector[i];
                }
            }

            // Clear our values above other capacity, if any
            ClearAboveLength(length);
        }
Beispiel #15
0
            public void VerifyConsistency(IColumn column, VerificationLevel level, ExecutionDetails details)
            {
                if (_words.Count > WordCountLimit)
                {
                    details.AddError(ExecutionDetails.WordIndexBlockTooFull, column.Name, _words.Count);
                }

                if (_words.Count != _sets.Count)
                {
                    details.AddError(ExecutionDetails.WordIndexBlockSizesMismatch, column.Name, _words.Count, _sets.Count);
                }

                if (level == VerificationLevel.Full)
                {
                    // Validate that all IDs in all sets are valid
                    // NOTE: Replacing with a validating GetInSet would be more thorough; check for duplicate values, padding problems, etc.
                    ShortSet allValidItems = new ShortSet(column.Count);
                    allValidItems.Not();

                    ShortSet items = new ShortSet(ushort.MaxValue);
                    for (ushort i = 0; i < _words.Count; ++i)
                    {
                        items.Clear();
                        GetInSet(i, items);

                        items.AndNot(allValidItems);
                        if (items.Count() > 0)
                        {
                            details.AddError(ExecutionDetails.WordIndexInvalidItemID, column.Name, _words[i], String.Join(", ", items.Values));
                        }
                    }
                }

                // Ask the Sets and Words columns to self-verify
                _sets.VerifyConsistency(level, details);
                _words.VerifyConsistency(level, details);
            }