public virtual void TestCtor() { Token t = new Token(); char[] content = "hello".ToCharArray(); t.SetTermBuffer(content, 0, content.Length); char[] buf = t.TermBuffer(); Assert.AreNotEqual(t.TermBuffer(), content); Assert.AreEqual("hello", t.Term); Assert.AreEqual("word", t.Type); Assert.AreEqual(0, t.Flags); t = new Token(6, 22); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual("hello", t.Term); Assert.AreEqual("(hello,6,22)", t.ToString()); Assert.AreEqual("word", t.Type); Assert.AreEqual(0, t.Flags); t = new Token(6, 22, 7); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual("hello", t.Term); Assert.AreEqual("(hello,6,22)", t.ToString()); Assert.AreEqual(7, t.Flags); t = new Token(6, 22, "junk"); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual("hello", t.Term); Assert.AreEqual("(hello,6,22,type=junk)", t.ToString()); Assert.AreEqual(0, t.Flags); }
public virtual void TestMixedStringArray() { Token t = new Token("hello", 0, 5); Assert.AreEqual(t.TermLength(), 5); Assert.AreEqual(t.Term, "hello"); t.SetTermBuffer("hello2"); Assert.AreEqual(t.TermLength(), 6); Assert.AreEqual(t.Term, "hello2"); t.SetTermBuffer("hello3".ToCharArray(), 0, 6); Assert.AreEqual(t.Term, "hello3"); char[] buffer = t.TermBuffer(); buffer[1] = 'o'; Assert.AreEqual(t.Term, "hollo3"); }
public virtual void TestToString() { char[] b = new char[]{'a', 'l', 'o', 'h', 'a'}; Token t = new Token("", 0, 5); t.SetTermBuffer(b, 0, 5); Assert.AreEqual("(aloha,0,5)", t.ToString()); t.SetTermText("hi there"); Assert.AreEqual("(hi there,0,5)", t.ToString()); }
public virtual void TestToString() { char[] b = new char[] { 'a', 'l', 'o', 'h', 'a' }; Token t = new Token("", 0, 5); t.SetTermBuffer(b, 0, 5); Assert.AreEqual("(aloha,0,5)", t.ToString()); t.SetTermText("hi there"); Assert.AreEqual("(hi there,0,5)", t.ToString()); }
public virtual void TestResize() { Token t = new Token(); char[] content = "hello".ToCharArray(); t.SetTermBuffer(content, 0, content.Length); for (int i = 0; i < 2000; i++) { t.ResizeTermBuffer(i); Assert.IsTrue(i <= t.TermBuffer().Length); Assert.AreEqual("hello", t.Term); } }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { if (stemmer.Stem(result.TermBuffer(), 0, result.termLength)) result.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength()); return result; } else return null; }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { if (stemmer.Stem(result.TermBuffer(), 0, result.termLength)) { result.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength()); } return(result); } else { return(null); } }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); Token nextToken = input.Next(reusableToken); if (nextToken == null) { return(null); } if (stemmer.Stem(nextToken.TermBuffer(), 0, nextToken.TermLength())) { nextToken.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength()); } return(nextToken); }
public override Token Next(Token token) { token.Clear(); if (start == 0) { length = input.Read((System.Char[])ioBuffer, 0, ioBuffer.Length); if (length <= 0) return null; } if (start == length) return null; token.SetTermBuffer(ioBuffer, start, 1); start++; token.termBuffer[0] = System.Char.ToLower(token.termBuffer[0]); return token; }
public virtual void TestMixedStringArray() { Token t = new Token("hello", 0, 5); Assert.AreEqual(t.TermText(), "hello"); Assert.AreEqual(t.TermLength(), 5); Assert.AreEqual(new System.String(t.TermBuffer(), 0, 5), "hello"); t.SetTermText("hello2"); Assert.AreEqual(t.TermLength(), 6); Assert.AreEqual(new System.String(t.TermBuffer(), 0, 6), "hello2"); t.SetTermBuffer("hello3".ToCharArray(), 0, 6); Assert.AreEqual(t.TermText(), "hello3"); // Make sure if we get the buffer and change a character // that termText() reflects the change char[] buffer = t.TermBuffer(); buffer[1] = 'o'; Assert.AreEqual(t.TermText(), "hollo3"); }
public virtual void TestTermBufferEquals() { Token t1a = new Token(); char[] content1a = "hello".ToCharArray(); t1a.SetTermBuffer(content1a, 0, 5); Token t1b = new Token(); char[] content1b = "hello".ToCharArray(); t1b.SetTermBuffer(content1b, 0, 5); Token t2 = new Token(); char[] content2 = "hello2".ToCharArray(); t2.SetTermBuffer(content2, 0, 6); Assert.IsTrue(t1a.Equals(t1b)); Assert.IsFalse(t1a.Equals(t2)); Assert.IsFalse(t2.Equals(t1b)); }
public virtual void TestClone() { Token t = new Token(0, 5); char[] content = "hello".ToCharArray(); t.SetTermBuffer(content, 0, 5); char[] buf = t.TermBuffer(); Token copy = (Token)TestSimpleAttributeImpls.AssertCloneIsEqual(t); Assert.AreEqual(t.Term, copy.Term); Assert.AreNotSame(buf, copy.TermBuffer()); Payload pl = new Payload(new byte[] { 1, 2, 3, 4 }); t.Payload = pl; copy = (Token)TestSimpleAttributeImpls.AssertCloneIsEqual(t); Assert.AreEqual(pl, copy.Payload); Assert.AreNotSame(pl, copy.Payload); }
public virtual void TestMixedStringArray() { Token t = new Token("hello", 0, 5); Assert.AreEqual(t.TermText(), "hello"); Assert.AreEqual(t.TermLength(), 5); Assert.AreEqual(t.Term(), "hello"); t.SetTermText("hello2"); Assert.AreEqual(t.TermLength(), 6); Assert.AreEqual(t.Term(), "hello2"); t.SetTermBuffer("hello3".ToCharArray(), 0, 6); Assert.AreEqual(t.TermText(), "hello3"); // Make sure if we get the buffer and change a character // that termText() reflects the change char[] buffer = t.TermBuffer(); buffer[1] = 'o'; Assert.AreEqual(t.TermText(), "hollo3"); }
public virtual System.Object Clone() { try { Token t = (Token)base.MemberwiseClone(); if (termBuffer != null) { t.termBuffer = null; t.SetTermBuffer(termBuffer, 0, termLength); } if (payload != null) { t.SetPayload((Payload)payload.Clone()); } return(t); } catch (System.Exception e) { throw new System.SystemException("", e); // shouldn't happen } }
public override Token Next(Token token) { token.Clear(); if (start == 0) { length = input.Read((System.Char[])ioBuffer, 0, ioBuffer.Length); if (length <= 0) { return(null); } } if (start == length) { return(null); } token.SetTermBuffer(ioBuffer, start, 1); start++; token.termBuffer[0] = System.Char.ToLower(token.termBuffer[0]); return(token); }
public override Token Next(Token result) { result = input.Next(result); if (result != null) { char[] buffer = result.TermBuffer(); int length = result.TermLength(); // If no characters actually require rewriting then we // just return token as-is: for (int i = 0; i < length; i++) { char c = buffer[i]; if (c >= '\u00c0' && c <= '\u0178') { RemoveAccents(buffer, length); result.SetTermBuffer(output, 0, outputPos); break; } } return result; } else return null; }
private Token TokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) { Token token = new Token(startOffset, endOffset); token.SetTermBuffer(text); token.PositionIncrement = posIncr; ShingleMatrixFilter.DefaultSettingsCodec.SetWeight(token, weight); return token; }
public virtual void TestGrow() { Token t = new Token(); System.Text.StringBuilder buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { char[] content = buf.ToString().ToCharArray(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(buf.Length, t.TermLength()); Assert.AreEqual(buf.ToString(), t.Term); buf.Append(buf.ToString()); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1048576, t.TermBuffer().Length); // now as a string, first variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1048576, t.TermBuffer().Length); // now as a string, second variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1048576, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(32768, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(32768, t.TermBuffer().Length); }
private static Token CreateToken(String term, int start, int offset) { var token = new Token(start, offset); token.SetTermBuffer(term); return token; }
protected internal void AddToken(float score) { if (NumTokens < MAX_NUM_TOKENS_PER_GROUP) { int termStartOffset = offsetAtt.StartOffset; int termEndOffset = offsetAtt.EndOffset; if (NumTokens == 0) { startOffset = MatchStartOffset = termStartOffset; endOffset = MatchEndOffset = termEndOffset; tot += score; } else { startOffset = Math.Min(startOffset, termStartOffset); endOffset = Math.Max(endOffset, termEndOffset); if (score > 0) { if (tot == 0) { MatchStartOffset = offsetAtt.StartOffset; MatchEndOffset = offsetAtt.EndOffset; } else { MatchStartOffset = Math.Min(MatchStartOffset, termStartOffset); MatchEndOffset = Math.Max(MatchEndOffset, termEndOffset); } tot += score; } } Token token = new Token(termStartOffset, termEndOffset); token.SetTermBuffer(termAtt.Term); tokens[NumTokens] = token; scores[NumTokens] = score; NumTokens++; } }
private Token GetNextSuffixInputToken(Token token) { if (!Suffix.IncrementToken()) return null; token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength()); token.SetPositionIncrement(_posIncrAtt.GetPositionIncrement()); token.SetFlags(_flagsAtt.GetFlags()); token.SetOffset(_offsetAtt.StartOffset(), _offsetAtt.EndOffset()); token.SetType(_typeAtt.Type()); token.SetPayload(_payloadAtt.GetPayload()); return token; }
public void SetTermBuffer(char[] buffer, int offset, int length) { delegate_Renamed.SetTermBuffer(buffer, offset, length); }
private static Token TokenFactory(String text, int posIncr, int startOffset, int endOffset) { var token = new Token(startOffset, endOffset); token.SetTermBuffer(text); token.SetPositionIncrement(posIncr); return token; }
/// <summary> /// Low level api. /// Returns a token stream or null if no offset info available in index. /// This can be used to feed the highlighter with a pre-parsed token stream /// /// In my tests the speeds to recreate 1000 token streams using this method are: /// - with TermVector offset only data stored - 420 milliseconds /// - with TermVector offset AND position data stored - 271 milliseconds /// (nb timings for TermVector with position data are based on a tokenizer with contiguous /// positions - no overlaps or gaps) /// The cost of not using TermPositionVector to store /// pre-parsed content and using an analyzer to re-parse the original content: /// - reanalyzing the original content - 980 milliseconds /// /// The re-analyze timings will typically vary depending on - /// 1) The complexity of the analyzer code (timings above were using a /// stemmer/lowercaser/stopword combo) /// 2) The number of other fields (Lucene reads ALL fields off the disk /// when accessing just one document field - can cost dear!) /// 3) Use of compression on field storage - could be faster due to compression (less disk IO) /// or slower (more CPU burn) depending on the content. /// </summary> /// <param name="tpv"/> /// <param name="tokenPositionsGuaranteedContiguous">true if the token position numbers have no overlaps or gaps. If looking /// to eek out the last drops of performance, set to true. If in doubt, set to false.</param> public static TokenStream GetTokenStream(TermPositionVector tpv, bool tokenPositionsGuaranteedContiguous) { //code to reconstruct the original sequence of Tokens String[] terms = tpv.GetTerms(); int[] freq = tpv.GetTermFrequencies(); int totalTokens = freq.Sum(); var tokensInOriginalOrder = new Token[totalTokens]; List<Token> unsortedTokens = null; for (int t = 0; t < freq.Length; t++) { TermVectorOffsetInfo[] offsets = tpv.GetOffsets(t); if (offsets == null) { return null; } int[] pos = null; if (tokenPositionsGuaranteedContiguous) { //try get the token position info to speed up assembly of tokens into sorted sequence pos = tpv.GetTermPositions(t); } if (pos == null) { //tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later if (unsortedTokens == null) { unsortedTokens = new List<Token>(); } foreach (TermVectorOffsetInfo t1 in offsets) { var token = new Token(t1.StartOffset, t1.EndOffset); token.SetTermBuffer(terms[t]); unsortedTokens.Add(token); } } else { //We have positions stored and a guarantee that the token position information is contiguous // This may be fast BUT wont work if Tokenizers used which create >1 token in same position or // creates jumps in position numbers - this code would fail under those circumstances //tokens stored with positions - can use this to index straight into sorted array for (int tp = 0; tp < pos.Length; tp++) { var token = new Token(terms[t], offsets[tp].StartOffset, offsets[tp].EndOffset); tokensInOriginalOrder[pos[tp]] = token; } } } //If the field has been stored without position data we must perform a sort if (unsortedTokens != null) { tokensInOriginalOrder = unsortedTokens.ToArray(); Array.Sort(tokensInOriginalOrder, (t1, t2) => { if (t1.StartOffset > t2.EndOffset) return 1; if (t1.StartOffset < t2.StartOffset) return -1; return 0; }); } return new StoredTokenStream(tokensInOriginalOrder); }
public override Token Next(Token result) { if (buffered != null) { Token t = buffered; buffered = null; return t; } Token t2 = input.Next(result); if (t2 == null) return null; if (System.Char.IsDigit(t2.TermBuffer()[0])) { t2.SetPositionIncrement(t2.TermBuffer()[0] - '0'); } if (first) { // set payload on first position only t2.SetPayload(new Payload(new byte[]{100})); first = false; } // index a "synonym" for every token buffered = (Token) t2.Clone(); buffered.SetPayload(null); buffered.SetPositionIncrement(0); buffered.SetTermBuffer(new char[]{'b'}, 0, 1); return t2; }
private static Token TokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset, TokenPositioner positioner) { var token = new Token(startOffset, endOffset); token.SetTermBuffer(text); token.SetPositionIncrement(posIncr); ShingleMatrixFilter.DefaultSettingsCodec.SetWeight(token, weight); ShingleMatrixFilter.DefaultSettingsCodec.SetTokenPositioner(token, positioner); return token; }
public virtual void TestCopyTo() { Token t = new Token(); Token copy = (Token) TestSimpleAttributeImpls.AssertCopyIsEqual(t); Assert.AreEqual("", t.Term); Assert.AreEqual("", copy.Term); t = new Token(0, 5); char[] content = "hello".ToCharArray(); t.SetTermBuffer(content, 0, 5); char[] buf = t.TermBuffer(); copy = (Token) TestSimpleAttributeImpls.AssertCopyIsEqual(t); Assert.AreEqual(t.Term, copy.Term); Assert.AreNotSame(buf, copy.TermBuffer()); Payload pl = new Payload(new byte[]{1, 2, 3, 4}); t.Payload = pl; copy = (Token) TestSimpleAttributeImpls.AssertCopyIsEqual(t); Assert.AreEqual(pl, copy.Payload); Assert.AreNotSame(pl, copy.Payload); }
private Token GetNextInputToken(Token token) { if (!_input.IncrementToken()) return null; token.SetTermBuffer(_inTermAtt.TermBuffer(), 0, _inTermAtt.TermLength()); token.SetPositionIncrement(_inPosIncrAtt.GetPositionIncrement()); token.SetFlags(_inFlagsAtt.GetFlags()); token.SetOffset(_inOffsetAtt.StartOffset(), _inOffsetAtt.EndOffset()); token.SetType(_inTypeAtt.Type()); token.SetPayload(_inPayloadAtt.GetPayload()); return token; }
private Token TokenFactory(String text, int posIncr, int startOffset, int endOffset) { Token token = new Token(startOffset, endOffset); token.SetTermBuffer(text); token.PositionIncrement = posIncr; return token; }
public virtual void TestGrow() { Token t = new Token(); System.Text.StringBuilder buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { char[] content = buf.ToString().ToCharArray(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(buf.Length, t.TermLength()); Assert.AreEqual(buf.ToString(), t.Term); buf.Append(buf.ToString()); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1179654, t.TermBuffer().Length); // now as a string, first variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1179654, t.TermBuffer().Length); // now as a string, second variant t = new Token(); buf = new System.Text.StringBuilder("ab"); for (int i = 0; i < 20; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append(content); } Assert.AreEqual(1048576, t.TermLength()); Assert.AreEqual(1179654, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(20167, t.TermBuffer().Length); // Test for slow growth to a long term t = new Token(); buf = new System.Text.StringBuilder("a"); for (int i = 0; i < 20000; i++) { System.String content = buf.ToString(); t.SetTermBuffer(content); Assert.AreEqual(content.Length, t.TermLength()); Assert.AreEqual(content, t.Term); buf.Append("a"); } Assert.AreEqual(20000, t.TermLength()); Assert.AreEqual(20167, t.TermBuffer().Length); }
public override bool IncrementToken() { if (currentRealToken == null) { bool next = realStream.IncrementToken(); if (!next) { return false; } //Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset()); ClearAttributes(); termAtt.SetTermBuffer(realTermAtt.Term); offsetAtt.SetOffset(realOffsetAtt.StartOffset, realOffsetAtt.EndOffset); posIncrAtt.PositionIncrement = realPosIncrAtt.PositionIncrement; String expansions = synonyms[realTermAtt.Term]; if (expansions == null) { return true; } st = new Tokenizer(expansions, ","); if (st.HasMoreTokens()) { currentRealToken = new Token(realOffsetAtt.StartOffset, realOffsetAtt.EndOffset); currentRealToken.SetTermBuffer(realTermAtt.Term); } return true; } else { String tok = st.NextToken(); ClearAttributes(); termAtt.SetTermBuffer(tok); offsetAtt.SetOffset(currentRealToken.StartOffset, currentRealToken.EndOffset); posIncrAtt.PositionIncrement = 0; if (!st.HasMoreTokens()) { currentRealToken = null; st = null; } return true; } }
/// <summary> /// This method exists in order to avoid recursive calls to the method /// as the complexity of a fairly small matrix then easily would require /// a gigabyte sized stack per thread. /// </summary> /// <param name="reusableToken"></param> /// <returns>null if exhausted, instance request_next_token if one more call is required for an answer, /// or instance parameter resuableToken.</returns> private Token ProduceNextToken(Token reusableToken) { if (_currentPermuationTokens != null) { _currentShingleLength++; if (_currentShingleLength + _currentPermutationTokensStartOffset <= _currentPermuationTokens.Count && _currentShingleLength <= MaximumShingleSize) { // it is possible to create at least one more shingle of the current matrix permutation if (IsIgnoringSinglePrefixOrSuffixShingle && _currentShingleLength == 1 && (_currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsFirst || _currentPermutationRows[_currentPermutationTokensStartOffset].Column.IsLast)) { return Next(); } var termLength = 0; var shingle = new EquatableList<Token>(); for (int i = 0; i < _currentShingleLength; i++) { var shingleToken = _currentPermuationTokens[i + _currentPermutationTokensStartOffset]; termLength += shingleToken.TermLength(); shingle.Add(shingleToken); } if (SpacerCharacter != null) termLength += _currentShingleLength - 1; // only produce shingles that not already has been created if (!_shinglesSeen.Add(shingle)) return _requestNextToken; // shingle token factory var sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future. ;) foreach (var shingleToken in shingle) { if (SpacerCharacter != null && sb.Length > 0) sb.Append(SpacerCharacter); sb.Append(shingleToken.TermBuffer(), 0, shingleToken.TermLength()); } reusableToken.SetTermBuffer(sb.ToString()); UpdateToken(reusableToken, shingle, _currentPermutationTokensStartOffset, _currentPermutationRows, _currentPermuationTokens); return reusableToken; } // it is NOT possible to create one more shingles of the current matrix permutation if (_currentPermutationTokensStartOffset < _currentPermuationTokens.Count - 1) { // reset shingle size and move one step to the right in the current tokens permutation _currentPermutationTokensStartOffset++; _currentShingleLength = MinimumShingleSize - 1; return _requestNextToken; } // todo does this ever occur? if (_permutations == null) return null; if (!_permutations.HasNext()) { // load more data (if available) to the matrix // don't really care, we just read it. if (_input != null) ReadColumn(); // get rid of resources // delete the first column in the matrix var deletedColumn = Matrix.Columns[0]; Matrix.Columns.RemoveAt(0); // remove all shingles seen that include any of the tokens from the deleted column. var deletedColumnTokens = deletedColumn.Rows.SelectMany(row => row.Tokens).ToList(); // I'm a little concerned about this part of the code, because the unit tests currently // don't cover this scenario. (I put a break point here, and ran the unit tests in debug mode // and this code block was never hit... I also changed it significatly from the Java version // to use RemoveWhere and LINQ. // // TODO: Write a unit test to cover this and make sure this is a good port! -thoward // linq version _shinglesSeen.RemoveWhere( shingle => (shingle.Find(deletedColumnTokens.Contains) != default(Token))); //// initial conversion //var shinglesSeenIterator = _shinglesSeen.ToList(); //foreach (var shingle in shinglesSeenIterator) //{ // foreach (var deletedColumnToken in deletedColumnTokens) // { // if (shingle.Contains(deletedColumnToken)) // { // _shinglesSeen.Remove(shingle); // break; // } // } //} // exhausted if (Matrix.Columns.Count < MinimumShingleSize) return null; // create permutations of the matrix it now looks _permutations = Matrix.PermutationIterator(); } NextTokensPermutation(); return _requestNextToken; } if (_permutations == null) _permutations = Matrix.PermutationIterator(); if (!_permutations.HasNext()) return null; NextTokensPermutation(); return _requestNextToken; }