/// <summary> /// Constructor for enumeration of all terms from specified <c>reader</c> which share a prefix of /// length <paramref name="prefixLength"/> with <paramref name="term"/> and which have a fuzzy similarity > /// <paramref name="minSimilarity"/>. /// <para/> /// After calling the constructor the enumeration is already pointing to the first /// valid term if such a term exists. /// </summary> /// <param name="terms"> Delivers terms. </param> /// <param name="atts"> <see cref="AttributeSource"/> created by the rewrite method of <see cref="MultiTermQuery"/> /// thats contains information about competitive boosts during rewrite. It is also used /// to cache DFAs between segment transitions. </param> /// <param name="term"> Pattern term. </param> /// <param name="minSimilarity"> Minimum required similarity for terms from the reader. Pass an integer value /// representing edit distance. Passing a fraction is deprecated. </param> /// <param name="prefixLength"> Length of required common prefix. Default value is 0. </param> /// <param name="transpositions"> Transpositions </param> /// <exception cref="IOException"> if there is a low-level IO error </exception> public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, float minSimilarity, int prefixLength, bool transpositions) { InitializeInstanceFields(); if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity) { throw new ArgumentException("fractional edit distances are not allowed"); } if (minSimilarity < 0.0f) { throw new ArgumentException("minimumSimilarity cannot be less than 0"); } if (prefixLength < 0) { throw new ArgumentException("prefixLength cannot be less than 0"); } this.m_terms = terms; this.term = term; // convert the string into a utf32 int[] representation for fast comparisons string utf16 = term.Text(); this.m_termText = new int[utf16.CodePointCount(0, utf16.Length)]; for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp)) { m_termText[j++] = cp = utf16.CodePointAt(i); } this.m_termLength = m_termText.Length; this.dfaAtt = atts.AddAttribute <ILevenshteinAutomataAttribute>(); //The prefix could be longer than the word. //It's kind of silly though. It means we must match the entire word. this.m_realPrefixLength = prefixLength > m_termLength ? m_termLength : prefixLength; // if minSimilarity >= 1, we treat it as number of edits if (minSimilarity >= 1f) { this.m_minSimilarity = 0; // just driven by number of edits m_maxEdits = (int)minSimilarity; m_raw = true; } else { this.m_minSimilarity = minSimilarity; // calculate the maximum k edits for this similarity m_maxEdits = InitialMaxDistance(this.m_minSimilarity, m_termLength); m_raw = false; } if (transpositions && m_maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new NotSupportedException("with transpositions enabled, distances > " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported "); } this.transpositions = transpositions; this.m_scaleFactor = 1.0f / (1.0f - this.m_minSimilarity); this.maxBoostAtt = atts.AddAttribute <IMaxNonCompetitiveBoostAttribute>(); bottom = maxBoostAtt.MaxNonCompetitiveBoost; bottomTerm = maxBoostAtt.CompetitiveTerm; BottomChanged(null, true); }
/// <summary> /// Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of /// length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity > /// <code>minSimilarity</code>. /// <p> /// After calling the constructor the enumeration is already pointing to the first /// valid term if such a term exists. /// </summary> /// <param name="terms"> Delivers terms. </param> /// <param name="atts"> <seealso cref="AttributeSource"/> created by the rewrite method of <seealso cref="MultiTermQuery"/> /// thats contains information about competitive boosts during rewrite. It is also used /// to cache DFAs between segment transitions. </param> /// <param name="term"> Pattern term. </param> /// <param name="minSimilarity"> Minimum required similarity for terms from the reader. Pass an integer value /// representing edit distance. Passing a fraction is deprecated. </param> /// <param name="prefixLength"> Length of required common prefix. Default value is 0. </param> /// <exception cref="IOException"> if there is a low-level IO error </exception> public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, float minSimilarity, int prefixLength, bool transpositions) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity) { throw new System.ArgumentException("fractional edit distances are not allowed"); } if (minSimilarity < 0.0f) { throw new System.ArgumentException("minimumSimilarity cannot be less than 0"); } if (prefixLength < 0) { throw new System.ArgumentException("prefixLength cannot be less than 0"); } this.Terms = terms; this.Term_Renamed = term; // convert the string into a utf32 int[] representation for fast comparisons string utf16 = term.Text(); //LUCENE TO-DO //this.TermText = new int[utf16.codePointCount(0, utf16.Length)]; this.TermText = new int[utf16.Length]; for (int cp, i = 0, j = 0; i < utf16.Length; i += Character.CharCount(cp)) { TermText[j++] = cp = Character.CodePointAt(utf16, i); } this.TermLength = TermText.Length; this.DfaAtt = atts.AddAttribute<ILevenshteinAutomataAttribute>(); //The prefix could be longer than the word. //It's kind of silly though. It means we must match the entire word. this.RealPrefixLength = prefixLength > TermLength ? TermLength : prefixLength; // if minSimilarity >= 1, we treat it as number of edits if (minSimilarity >= 1f) { this.MinSimilarity_Renamed = 0; // just driven by number of edits MaxEdits = (int)minSimilarity; Raw = true; } else { this.MinSimilarity_Renamed = minSimilarity; // calculate the maximum k edits for this similarity MaxEdits = InitialMaxDistance(this.MinSimilarity_Renamed, TermLength); Raw = false; } if (transpositions && MaxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new System.NotSupportedException("with transpositions enabled, distances > " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported "); } this.Transpositions = transpositions; this.Scale_factor = 1.0f / (1.0f - this.MinSimilarity_Renamed); this.MaxBoostAtt = atts.AddAttribute<IMaxNonCompetitiveBoostAttribute>(); Bottom = MaxBoostAtt.MaxNonCompetitiveBoost; BottomTerm = MaxBoostAtt.CompetitiveTerm; BottomChanged(null, true); }