示例#1
0
        /// <summary>
        /// This method exists in order to avoid recursive calls to the method
        /// as the complexity of a fairly small matrix then easily would require
        /// a gigabyte sized stack per thread.
        /// </summary>
        /// <param name="reusableToken"></param>
        /// <returns>null if exhausted, instance request_next_token if one more call is required for an answer,
        /// or instance parameter resuableToken.</returns>
        private Token ProduceNextToken(Token reusableToken)
        {
            if (_currentPermuationTokens != null)
            {
                _currentShingleLength++;

                if (_currentShingleLength + _currentPermutationTokensStartOffset <= _currentPermuationTokens.Count &&
                    _currentShingleLength <= MaximumShingleSize)
                {
                    // it is possible to create at least one more shingle of the current matrix permutation

                    if (IsIgnoringSinglePrefixOrSuffixShingle &&
                        _currentShingleLength == 1 &&
                        (_currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsFirst || _currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsLast))
                    {
                        return(GetNextToken(reusableToken));
                    }

                    var termLength = 0;

                    var shingle = new EquatableList <Token>();

                    for (int i = 0; i < _currentShingleLength; i++)
                    {
                        var shingleToken = _currentPermuationTokens[i + _currentPermutationTokensStartOffset];
                        termLength += shingleToken.TermLength();
                        shingle.Add(shingleToken);
                    }
                    if (SpacerCharacter != null)
                    {
                        termLength += _currentShingleLength - 1;
                    }

                    // only produce shingles that not already has been created
                    if (!_shinglesSeen.Add(shingle))
                    {
                        return(_requestNextToken);
                    }

                    // shingle token factory
                    var sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future. ;)
                    foreach (var shingleToken in shingle)
                    {
                        if (SpacerCharacter != null && sb.Length > 0)
                        {
                            sb.Append(SpacerCharacter);
                        }

                        sb.Append(shingleToken.TermBuffer(), 0, shingleToken.TermLength());
                    }

                    reusableToken.SetTermBuffer(sb.ToString());
                    UpdateToken(reusableToken, shingle, _currentPermutationTokensStartOffset, _currentPermutationRows,
                                _currentPermuationTokens);

                    return(reusableToken);
                }

                // it is NOT possible to create one more shingles of the current matrix permutation
                if (_currentPermutationTokensStartOffset < _currentPermuationTokens.Count - 1)
                {
                    // reset shingle size and move one step to the right in the current tokens permutation
                    _currentPermutationTokensStartOffset++;
                    _currentShingleLength = MinimumShingleSize - 1;
                    return(_requestNextToken);
                }


                // todo does this ever occur?
                if (_permutations == null)
                {
                    return(null);
                }

                if (!_permutations.HasNext())
                {
                    // load more data (if available) to the matrix

                    // don't really care, we just read it.
                    if (_input != null)
                    {
                        ReadColumn();
                    }

                    // get rid of resources

                    // delete the first column in the matrix
                    var deletedColumn = Matrix.Columns[0];
                    Matrix.Columns.RemoveAt(0);

                    // remove all shingles seen that include any of the tokens from the deleted column.
                    var deletedColumnTokens = deletedColumn.Rows.SelectMany(row => row.Tokens).ToList();

                    // I'm a little concerned about this part of the code, because the unit tests currently
                    // don't cover this scenario. (I put a break point here, and ran the unit tests in debug mode
                    // and this code block was never hit... I also changed it significatly from the Java version
                    // to use RemoveWhere and LINQ.
                    //
                    // TODO: Write a unit test to cover this and make sure this is a good port! -thoward

                    // linq version
                    _shinglesSeen.RemoveWhere(
                        shingle => (shingle.Find(deletedColumnTokens.Contains) != default(Token)));

                    //// initial conversion
                    //var shinglesSeenIterator = _shinglesSeen.ToList();
                    //foreach (var shingle in shinglesSeenIterator)
                    //{
                    //    foreach (var deletedColumnToken in deletedColumnTokens)
                    //    {
                    //        if (shingle.Contains(deletedColumnToken))
                    //        {
                    //            _shinglesSeen.Remove(shingle);
                    //            break;
                    //        }
                    //    }
                    //}

                    // exhausted
                    if (Matrix.Columns.Count < MinimumShingleSize)
                    {
                        return(null);
                    }

                    // create permutations of the matrix it now looks
                    _permutations = Matrix.PermutationIterator();
                }

                NextTokensPermutation();
                return(_requestNextToken);
            }

            if (_permutations == null)
            {
                _permutations = Matrix.PermutationIterator();
            }

            if (!_permutations.HasNext())
            {
                return(null);
            }

            NextTokensPermutation();

            return(_requestNextToken);
        }
示例#2
0
 public override void Reset()
 {
     _permutations = null;
     _shinglesSeen.Clear();
     _input.Reset();
 }