/// <summary> /// This method exists in order to avoid recursive calls to the method /// as the complexity of a fairly small matrix then easily would require /// a gigabyte sized stack per thread. /// </summary> /// <param name="reusableToken"></param> /// <returns>null if exhausted, instance request_next_token if one more call is required for an answer, /// or instance parameter resuableToken.</returns> private Token ProduceNextToken(Token reusableToken) { if (_currentPermuationTokens != null) { _currentShingleLength++; if (_currentShingleLength + _currentPermutationTokensStartOffset <= _currentPermuationTokens.Count && _currentShingleLength <= MaximumShingleSize) { // it is possible to create at least one more shingle of the current matrix permutation if (IsIgnoringSinglePrefixOrSuffixShingle && _currentShingleLength == 1 && (_currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsFirst || _currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsLast)) { return(GetNextToken(reusableToken)); } var termLength = 0; var shingle = new EquatableList <Token>(); for (int i = 0; i < _currentShingleLength; i++) { var shingleToken = _currentPermuationTokens[i + _currentPermutationTokensStartOffset]; termLength += shingleToken.TermLength(); shingle.Add(shingleToken); } if (SpacerCharacter != null) { termLength += _currentShingleLength - 1; } // only produce shingles that not already has been created if (!_shinglesSeen.Add(shingle)) { return(_requestNextToken); } // shingle token factory var sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future. ;) foreach (var shingleToken in shingle) { if (SpacerCharacter != null && sb.Length > 0) { sb.Append(SpacerCharacter); } sb.Append(shingleToken.TermBuffer(), 0, shingleToken.TermLength()); } reusableToken.SetTermBuffer(sb.ToString()); UpdateToken(reusableToken, shingle, _currentPermutationTokensStartOffset, _currentPermutationRows, _currentPermuationTokens); return(reusableToken); } // it is NOT possible to create one more shingles of the current matrix permutation if (_currentPermutationTokensStartOffset < _currentPermuationTokens.Count - 1) { // reset shingle size and move one step to the right in the current tokens permutation _currentPermutationTokensStartOffset++; _currentShingleLength = MinimumShingleSize - 1; return(_requestNextToken); } // todo does this ever occur? if (_permutations == null) { return(null); } if (!_permutations.HasNext()) { // load more data (if available) to the matrix // don't really care, we just read it. if (_input != null) { ReadColumn(); } // get rid of resources // delete the first column in the matrix var deletedColumn = Matrix.Columns[0]; Matrix.Columns.RemoveAt(0); // remove all shingles seen that include any of the tokens from the deleted column. var deletedColumnTokens = deletedColumn.Rows.SelectMany(row => row.Tokens).ToList(); // I'm a little concerned about this part of the code, because the unit tests currently // don't cover this scenario. (I put a break point here, and ran the unit tests in debug mode // and this code block was never hit... I also changed it significatly from the Java version // to use RemoveWhere and LINQ. // // TODO: Write a unit test to cover this and make sure this is a good port! -thoward // linq version _shinglesSeen.RemoveWhere( shingle => (shingle.Find(deletedColumnTokens.Contains) != default(Token))); //// initial conversion //var shinglesSeenIterator = _shinglesSeen.ToList(); //foreach (var shingle in shinglesSeenIterator) //{ // foreach (var deletedColumnToken in deletedColumnTokens) // { // if (shingle.Contains(deletedColumnToken)) // { // _shinglesSeen.Remove(shingle); // break; // } // } //} // exhausted if (Matrix.Columns.Count < MinimumShingleSize) { return(null); } // create permutations of the matrix it now looks _permutations = Matrix.PermutationIterator(); } NextTokensPermutation(); return(_requestNextToken); } if (_permutations == null) { _permutations = Matrix.PermutationIterator(); } if (!_permutations.HasNext()) { return(null); } NextTokensPermutation(); return(_requestNextToken); }
public override void Reset() { _permutations = null; _shinglesSeen.Clear(); _input.Reset(); }