Beispiel #1
0
        /// <summary>Adds a clause to a boolean query.</summary>
        /// <throws>  TooManyClauses if the new number of clauses exceeds the maximum clause number </throws>
        /// <seealso cref="MaxClauseCount">
        /// </seealso>
        public virtual void  Add(BooleanClause clause)
        {
            if (clauses.Count >= _maxClauses)
            {
                throw new TooManyClauses();
            }

            clauses.Add(clause);
        }
Beispiel #2
0
        /// <summary>
        /// Adds a clause to a boolean query. </summary>
        /// <exception cref="TooManyClausesException"> If the new number of clauses exceeds the maximum clause number </exception>
        /// <seealso cref="MaxClauseCount"/>
        public virtual void Add(BooleanClause clause)
        {
            if (clauses.Count >= maxClauseCount)
            {
                throw new TooManyClausesException();
            }

            clauses.Add(clause);
        }
Beispiel #3
0
        /// <summary> Adds a term to the end of the query phrase.
        /// The relative position of the term within the phrase is specified explicitly.
        /// This allows e.g. phrases with more than one term at the same position
        /// or phrases with gaps (e.g. in connection with stopwords).
        ///
        /// </summary>
        /// <param name="term">
        /// </param>
        /// <param name="position">
        /// </param>
        public virtual void  Add(Term term, int position)
        {
            if (terms.Count == 0)
            {
                field = term.Field;
            }
            else if ((System.Object)term.Field != (System.Object)field)
            {
                throw new System.ArgumentException("All phrase terms must be in the same field: " + term);
            }

            terms.Add(term);
            positions.Add(position);
            if (position > maxPosition)
            {
                maxPosition = position;
            }
        }
Beispiel #4
0
 /// <summary>
 /// Add a subquery to this disjunction </summary>
 /// <param name="query"> The disjunct added </param>
 public virtual void Add(Query query)
 {
     disjuncts.Add(query);
 }
        /// <summary>
        /// This method exists in order to avoid recursive calls to the method
        /// as the complexity of a fairly small matrix then easily would require
        /// a gigabyte sized stack per thread.
        /// </summary>
        /// <param name="reusableToken"></param>
        /// <returns>null if exhausted, instance request_next_token if one more call is required for an answer, 
        /// or instance parameter resuableToken.</returns>
        private Token ProduceNextToken(Token reusableToken)
        {
            if (_currentPermuationTokens != null)
            {
                _currentShingleLength++;

                if (_currentShingleLength + _currentPermutationTokensStartOffset <= _currentPermuationTokens.Count
                    && _currentShingleLength <= MaximumShingleSize)
                {
                    // it is possible to create at least one more shingle of the current matrix permutation

                    if (IsIgnoringSinglePrefixOrSuffixShingle && 
                        _currentShingleLength == 1 && 
                        (_currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsFirst || _currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsLast))
                    {
                        return Next();
                    }

                    var termLength = 0;

                    var shingle = new EquatableList<Token>();

                    for (int i = 0; i < _currentShingleLength; i++)
                    {
                        var shingleToken = _currentPermuationTokens[i + _currentPermutationTokensStartOffset];
                        termLength += shingleToken.TermLength();
                        shingle.Add(shingleToken);
                    }
                    if (SpacerCharacter != null)
                        termLength += _currentShingleLength - 1;

                    // only produce shingles that not already has been created
                    if (!_shinglesSeen.Add(shingle))
                        return _requestNextToken;

                    // shingle token factory
                    var sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future. ;)
                    foreach (var shingleToken in shingle)
                    {
                        if (SpacerCharacter != null &&  sb.Length > 0)
                            sb.Append(SpacerCharacter);

                        sb.Append(shingleToken.TermBuffer(), 0, shingleToken.TermLength());
                    }

                    reusableToken.SetTermBuffer(sb.ToString());
                    UpdateToken(reusableToken, shingle, _currentPermutationTokensStartOffset, _currentPermutationRows,
                                _currentPermuationTokens);

                    return reusableToken;
                }

                // it is NOT possible to create one more shingles of the current matrix permutation
                if (_currentPermutationTokensStartOffset < _currentPermuationTokens.Count - 1)
                {
                    // reset shingle size and move one step to the right in the current tokens permutation
                    _currentPermutationTokensStartOffset++;
                    _currentShingleLength = MinimumShingleSize - 1;
                    return _requestNextToken;
                }


                // todo does this ever occur?
                if (_permutations == null)
                    return null;

                if (!_permutations.HasNext())
                {
                    // load more data (if available) to the matrix

                    // don't really care, we just read it.
                    if (_input != null)
                        ReadColumn();

                    // get rid of resources

                    // delete the first column in the matrix
                    var deletedColumn = Matrix.Columns[0];
                    Matrix.Columns.RemoveAt(0);

                    // remove all shingles seen that include any of the tokens from the deleted column.
                    var deletedColumnTokens = deletedColumn.Rows.SelectMany(row => row.Tokens).ToList();
                    
                    // I'm a little concerned about this part of the code, because the unit tests currently 
                    // don't cover this scenario. (I put a break point here, and ran the unit tests in debug mode 
                    // and this code block was never hit... I also changed it significatly from the Java version
                    // to use RemoveWhere and LINQ. 
                    //
                    // TODO: Write a unit test to cover this and make sure this is a good port! -thoward

                    // linq version
                    _shinglesSeen.RemoveWhere(
                        shingle => (shingle.Find(deletedColumnTokens.Contains) != default(Token)));

                    //// initial conversion
                    //var shinglesSeenIterator = _shinglesSeen.ToList();
                    //foreach (var shingle in shinglesSeenIterator)
                    //{
                    //    foreach (var deletedColumnToken in deletedColumnTokens)
                    //    {
                    //        if (shingle.Contains(deletedColumnToken))
                    //        {
                    //            _shinglesSeen.Remove(shingle);
                    //            break;
                    //        }
                    //    }
                    //}

                    // exhausted
                    if (Matrix.Columns.Count < MinimumShingleSize)
                        return null;

                    // create permutations of the matrix it now looks
                    _permutations = Matrix.PermutationIterator();
                }

                NextTokensPermutation();
                return _requestNextToken;
            }

            if (_permutations == null)
                _permutations = Matrix.PermutationIterator();

            if (!_permutations.HasNext())
                return null;

            NextTokensPermutation();

            return _requestNextToken;
        }
 public void Add(BaseFilterQuery query)
 {
     clauses.Add(query);
 }
Beispiel #7
0
 /*
  * <summary>Adds user input for "fuzzification" </summary>
  * <param name="queryString">The string which will be parsed by the analyzer and for which fuzzy variants will be parsed</param>
  * <param name="fieldName"></param>
  * <param name="minSimilarity">The minimum similarity of the term variants (see FuzzyTermEnum)</param>
  * <param name="prefixLength">Length of required common prefix on variant terms (see FuzzyTermEnum)</param>
  */
 public void AddTerms(String queryString, String fieldName, float minSimilarity, int prefixLength)
 {
     fieldVals.Add(new FieldVals(fieldName, minSimilarity, prefixLength, queryString));
 }
Beispiel #8
0
        /// <summary>
        /// This method exists in order to avoid recursive calls to the method
        /// as the complexity of a fairly small matrix then easily would require
        /// a gigabyte sized stack per thread.
        /// </summary>
        /// <param name="reusableToken"></param>
        /// <returns>null if exhausted, instance request_next_token if one more call is required for an answer,
        /// or instance parameter resuableToken.</returns>
        private Token ProduceNextToken(Token reusableToken)
        {
            if (_currentPermuationTokens != null)
            {
                _currentShingleLength++;

                if (_currentShingleLength + _currentPermutationTokensStartOffset <= _currentPermuationTokens.Count &&
                    _currentShingleLength <= MaximumShingleSize)
                {
                    // it is possible to create at least one more shingle of the current matrix permutation

                    if (IsIgnoringSinglePrefixOrSuffixShingle &&
                        _currentShingleLength == 1 &&
                        (_currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsFirst || _currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsLast))
                    {
                        return(GetNextToken(reusableToken));
                    }

                    var termLength = 0;

                    var shingle = new EquatableList <Token>();

                    for (int i = 0; i < _currentShingleLength; i++)
                    {
                        var shingleToken = _currentPermuationTokens[i + _currentPermutationTokensStartOffset];
                        termLength += shingleToken.TermLength();
                        shingle.Add(shingleToken);
                    }
                    if (SpacerCharacter != null)
                    {
                        termLength += _currentShingleLength - 1;
                    }

                    // only produce shingles that not already has been created
                    if (!_shinglesSeen.Add(shingle))
                    {
                        return(_requestNextToken);
                    }

                    // shingle token factory
                    var sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future. ;)
                    foreach (var shingleToken in shingle)
                    {
                        if (SpacerCharacter != null && sb.Length > 0)
                        {
                            sb.Append(SpacerCharacter);
                        }

                        sb.Append(shingleToken.TermBuffer(), 0, shingleToken.TermLength());
                    }

                    reusableToken.SetTermBuffer(sb.ToString());
                    UpdateToken(reusableToken, shingle, _currentPermutationTokensStartOffset, _currentPermutationRows,
                                _currentPermuationTokens);

                    return(reusableToken);
                }

                // it is NOT possible to create one more shingles of the current matrix permutation
                if (_currentPermutationTokensStartOffset < _currentPermuationTokens.Count - 1)
                {
                    // reset shingle size and move one step to the right in the current tokens permutation
                    _currentPermutationTokensStartOffset++;
                    _currentShingleLength = MinimumShingleSize - 1;
                    return(_requestNextToken);
                }


                // todo does this ever occur?
                if (_permutations == null)
                {
                    return(null);
                }

                if (!_permutations.HasNext())
                {
                    // load more data (if available) to the matrix

                    // don't really care, we just read it.
                    if (_input != null)
                    {
                        ReadColumn();
                    }

                    // get rid of resources

                    // delete the first column in the matrix
                    var deletedColumn = Matrix.Columns[0];
                    Matrix.Columns.RemoveAt(0);

                    // remove all shingles seen that include any of the tokens from the deleted column.
                    var deletedColumnTokens = deletedColumn.Rows.SelectMany(row => row.Tokens).ToList();

                    // I'm a little concerned about this part of the code, because the unit tests currently
                    // don't cover this scenario. (I put a break point here, and ran the unit tests in debug mode
                    // and this code block was never hit... I also changed it significatly from the Java version
                    // to use RemoveWhere and LINQ.
                    //
                    // TODO: Write a unit test to cover this and make sure this is a good port! -thoward

                    // linq version
                    _shinglesSeen.RemoveWhere(
                        shingle => (shingle.Find(deletedColumnTokens.Contains) != default(Token)));

                    //// initial conversion
                    //var shinglesSeenIterator = _shinglesSeen.ToList();
                    //foreach (var shingle in shinglesSeenIterator)
                    //{
                    //    foreach (var deletedColumnToken in deletedColumnTokens)
                    //    {
                    //        if (shingle.Contains(deletedColumnToken))
                    //        {
                    //            _shinglesSeen.Remove(shingle);
                    //            break;
                    //        }
                    //    }
                    //}

                    // exhausted
                    if (Matrix.Columns.Count < MinimumShingleSize)
                    {
                        return(null);
                    }

                    // create permutations of the matrix it now looks
                    _permutations = Matrix.PermutationIterator();
                }

                NextTokensPermutation();
                return(_requestNextToken);
            }

            if (_permutations == null)
            {
                _permutations = Matrix.PermutationIterator();
            }

            if (!_permutations.HasNext())
            {
                return(null);
            }

            NextTokensPermutation();

            return(_requestNextToken);
        }