Example #1
0
 public SingleCharTokenizer(TokenStream input) : base(input)
 {
     _input                      = input;
     _termAttribute              = (TermAttribute)AddAttribute(typeof(TermAttribute));
     _offsetAttribute            = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
     _positionIncrementAttribute = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
 }
Example #2
0
        /// <summary> Adds term frequencies found by tokenizing text from reader into the Map words</summary>
        /// <param name="r">a source of text to be tokenized
        /// </param>
        /// <param name="termFreqMap">a Map of terms and their frequencies
        /// </param>
        /// <param name="fieldName">Used by analyzer for any special per-field analysis
        /// </param>
        private void AddTermFrequencies(System.IO.TextReader r, System.Collections.IDictionary termFreqMap, System.String fieldName)
        {
            TokenStream ts         = analyzer.TokenStream(fieldName, r);
            int         tokenCount = 0;
            // for every token
            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));

            while (ts.IncrementToken())
            {
                string word = termAtt.Term();
                tokenCount++;
                if (tokenCount > maxNumTokensParsed)
                {
                    break;
                }
                if (IsNoiseWord(word))
                {
                    continue;
                }

                // increment frequency
                Int cnt = (Int)termFreqMap[word];
                if (cnt == null)
                {
                    termFreqMap[word] = new Int();
                }
                else
                {
                    cnt.x++;
                }
            }
        }
Example #3
0
        public virtual void TestToStringAndMultiAttributeImplementations()
        {
            AttributeSource src     = new AttributeSource();
            TermAttribute   termAtt = (TermAttribute)src.AddAttribute(typeof(TermAttribute));
            TypeAttribute   typeAtt = (TypeAttribute)src.AddAttribute(typeof(TypeAttribute));

            termAtt.SetTermBuffer("TestTerm");
            typeAtt.SetType("TestType");
            Assert.AreEqual("(" + termAtt.ToString() + "," + typeAtt.ToString() + ")", src.ToString(), "Attributes should appear in original order");
            System.Collections.Generic.IEnumerator <AttributeImpl> it = src.GetAttributeImplsIterator().GetEnumerator();
            Assert.IsTrue(it.MoveNext(), "Iterator should have 2 attributes left");
            Assert.AreSame(termAtt, it.Current, "First AttributeImpl from iterator should be termAtt");
            Assert.IsTrue(it.MoveNext(), "Iterator should have 1 attributes left");
            Assert.AreSame(typeAtt, it.Current, "Second AttributeImpl from iterator should be typeAtt");
            Assert.IsFalse(it.MoveNext(), "Iterator should have 0 attributes left");

            src = new AttributeSource();
            src.AddAttributeImpl(new Token());
            // this should not add a new attribute as Token implements TermAttribute, too
            termAtt = (TermAttribute)src.AddAttribute(typeof(TermAttribute));
            Assert.IsTrue(termAtt is Token, "TermAttribute should be implemented by Token");
            // get the Token attribute and check, that it is the only one
            it = src.GetAttributeImplsIterator().GetEnumerator();
            Assert.IsTrue(it.MoveNext());
            Token tok = (Token)it.Current;

            Assert.IsFalse(it.MoveNext(), "There should be only one attribute implementation instance");

            termAtt.SetTermBuffer("TestTerm");
            Assert.AreEqual("(" + tok.ToString() + ")", src.ToString(), "Token should only printed once");
        }
Example #4
0
        protected new void AddTermFrequencies(System.IO.StreamReader r, System.Collections.IDictionary termFreqMap, System.String fieldName)
        {
            var           analyzer   = Analyzers[fieldName];
            TokenStream   ts         = analyzer.TokenStream(fieldName, r);
            TermAttribute termAtt    = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));
            int           tokenCount = 0;

            while (ts.IncrementToken())
            {
                // for every token
                System.String word = termAtt.Term();
                tokenCount++;
                if (tokenCount > GetMaxNumTokensParsed())
                {
                    break;
                }
                if (IsNoiseWord(word))
                {
                    continue;
                }

                // increment frequency
                var cnt = (Int)termFreqMap[word];
                if (cnt == null)
                {
                    termFreqMap[word] = new Int();
                }
                else
                {
                    cnt.x++;
                }
            }
        }
Example #5
0
        public override void  CopyTo(AttributeImpl target)
        {
            InitTermBuffer();
            TermAttribute t = (TermAttribute)target;

            t.SetTermBuffer(termBuffer, 0, termLength);
        }
Example #6
0
        public virtual void TestCloneAttributes()
        {
            AttributeSource src     = new AttributeSource();
            TermAttribute   termAtt = (TermAttribute)src.AddAttribute(typeof(TermAttribute));
            TypeAttribute   typeAtt = (TypeAttribute)src.AddAttribute(typeof(TypeAttribute));

            termAtt.SetTermBuffer("TestTerm");
            typeAtt.SetType("TestType");

            AttributeSource clone = src.CloneAttributes();

            System.Collections.IEnumerator it = clone.GetAttributeClassesIterator().GetEnumerator();
            Assert.IsTrue(it.MoveNext());
            Assert.AreEqual(typeof(TermAttribute), it.Current, "TermAttribute must be the first attribute");
            Assert.IsTrue(it.MoveNext());
            Assert.AreEqual(typeof(TypeAttribute), it.Current, "TypeAttribute must be the second attribute");
            Assert.IsFalse(it.MoveNext(), "No more attributes");

            TermAttribute termAtt2 = (TermAttribute)clone.GetAttribute(typeof(TermAttribute));
            TypeAttribute typeAtt2 = (TypeAttribute)clone.GetAttribute(typeof(TypeAttribute));

            Assert.IsFalse(ReferenceEquals(termAtt2, termAtt), "TermAttribute of original and clone must be different instances");
            Assert.IsFalse(ReferenceEquals(typeAtt2, typeAtt), "TypeAttribute of original and clone must be different instances");
            Assert.AreEqual(termAtt2, termAtt, "TermAttribute of original and clone must be equal");
            Assert.AreEqual(typeAtt2, typeAtt, "TypeAttribute of original and clone must be equal");
        }
Example #7
0
        public virtual void TestCaptureState()
        {
            // init a first instance
            AttributeSource src     = new AttributeSource();
            TermAttribute   termAtt = (TermAttribute)src.AddAttribute(typeof(TermAttribute));
            TypeAttribute   typeAtt = (TypeAttribute)src.AddAttribute(typeof(TypeAttribute));

            termAtt.SetTermBuffer("TestTerm");
            typeAtt.SetType("TestType");
            int hashCode = src.GetHashCode();

            AttributeSource.State state = src.CaptureState();

            // modify the attributes
            termAtt.SetTermBuffer("AnotherTestTerm");
            typeAtt.SetType("AnotherTestType");
            Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different");

            src.RestoreState(state);
            Assert.AreEqual("TestTerm", termAtt.Term());
            Assert.AreEqual("TestType", typeAtt.Type());
            Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore");

            // restore into an exact configured copy
            AttributeSource copy = new AttributeSource();

            copy.AddAttribute(typeof(TermAttribute));
            copy.AddAttribute(typeof(TypeAttribute));
            copy.RestoreState(state);
            Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore");
            Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore");

            // init a second instance (with attributes in different order and one additional attribute)
            AttributeSource src2 = new AttributeSource();

            typeAtt = (TypeAttribute)src2.AddAttribute(typeof(TypeAttribute));
            Lucene.Net.Analysis.Tokenattributes.FlagsAttribute flagsAtt = (Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)src2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute));
            termAtt = (TermAttribute)src2.AddAttribute(typeof(TermAttribute));
            flagsAtt.SetFlags(12345);

            src2.RestoreState(state);
            Assert.AreEqual("TestTerm", termAtt.Term());
            Assert.AreEqual("TestType", typeAtt.Type());
            Assert.AreEqual(12345, flagsAtt.GetFlags(), "FlagsAttribute should not be touched");

            // init a third instance missing one Attribute
            AttributeSource src3 = new AttributeSource();

            termAtt = (TermAttribute)src3.AddAttribute(typeof(TermAttribute));
            try
            {
                src3.RestoreState(state);
                Assert.Fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException");
            }
            catch (System.ArgumentException iae)
            {
                // pass
            }
        }
        /// <summary>
        ///   Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using
        ///   affix rules in the provided HunspellDictionary.
        /// </summary>
        /// <param name="input">TokenStream whose tokens will be stemmed.</param>
        /// <param name="dictionary">HunspellDictionary containing the affix rules and words that will be used to stem the tokens.</param>
        /// <param name="dedup">true if only unique terms should be output.</param>
        public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, Boolean dedup = true)
            : base(input)
        {
            _posIncAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
            _termAtt   = (TermAttribute)AddAttribute(typeof(TermAttribute));

            _dedup   = dedup;
            _stemmer = new HunspellStemmer(dictionary);
        }
Example #9
0
        /// <summary>
        /// Get the term qualified name when using the type of <typeparamref name="T"/>
        /// </summary>
        /// <typeparam name="T">The type of the term.</typeparam>
        /// <returns>The qualified name.</returns>
        public static string GetTermQualifiedName <T>()
        {
            object[] attributes = typeof(T).GetCustomAttributes(typeof(TermAttribute), false);
            if (attributes == null && attributes.Length == 0)
            {
                return(null);
            }

            TermAttribute term = (TermAttribute)attributes[0];

            return(term.QualifiedName);
        }
Example #10
0
        private void Init(int gramSize)
        {
            if (gramSize < 1)
            {
                throw new ArgumentException(
                          "minGram must be greater than zero");
            }
            _mGramSize = gramSize;

            _mTermAtt   = (TermAttribute)AddAttribute(typeof(TermAttribute));
            _mOffsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
        }
Example #11
0
        public string StemText(string text)
        {
            string      result = "";
            TokenStream stream = Stemmer.TokenStream(String.Empty, new StringReader(text));

            while (stream.IncrementToken())
            {
                TermAttribute termAttr = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));
                result = result + termAttr.Term() + " ";
            }

            return(result.Trim());
        }
Example #12
0
        public IntMetaDataTokenStream(string tokenText)
        {
            _tokenText = tokenText;

            // NOTE: Calling the AddAttribute<T> method failed, so
            // switched to using AddAttributeImpl.
            _termAttribute   = new TermAttribute();
            _offsetAttribute = new OffsetAttribute();
            _payloadAtt      = new PayloadAttribute();
            base.AddAttributeImpl(_termAttribute);
            base.AddAttributeImpl(_offsetAttribute);
            base.AddAttributeImpl(_payloadAtt);
        }
Example #13
0
        public SynonymFilter(TokenStream input, SynonymEngine engine) : base(input)
        {
            if (engine == null)
            {
                throw new ArgumentNullException("synonymEngine");
            }
            synonymStack = new Stack <string>();
            this.engine  = engine;

            this.termAtt    = (TermAttribute)AddAttribute <ITermAttribute>();
            this.posIncrAtt = (PositionIncrementAttribute)AddAttribute <IPositionIncrementAttribute>();

            //this.termAtt = this.AddAttribute<string>();
            //this.posIncrAtt = this.AddAttribute<string>();
        }
Example #14
0
        public static IEnumerable <string> TokensFromAnalysis(Analyzer analyzer, String text)
        {
            TokenStream   stream    = analyzer.TokenStream("contents", new StringReader(text));
            List <string> result    = new List <string>();
            TermAttribute tokenAttr = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            while (stream.IncrementToken())
            {
                result.Add(tokenAttr.Term());
            }

            stream.End();
            stream.Close();

            return(result);
        }
Example #15
0
            /**
             * Creates NGramTokenFilter with given min and max n-grams.
             * <param name="input"><see cref="TokenStream"/> holding the input to be tokenized</param>
             * <param name="minGram">the smallest n-gram to generate</param>
             * <param name="maxGram">the largest n-gram to generate</param>
             */
            public NGramTokenFilter(TokenStream input, int minGram, int maxGram)
                : base(input)
            {
                if (minGram < 1)
                {
                    throw new System.ArgumentException("minGram must be greater than zero");
                }
                if (minGram > maxGram)
                {
                    throw new System.ArgumentException("minGram must not be greater than maxGram");
                }
                this.minGram = minGram;
                this.maxGram = maxGram;

                this.termAtt   = (TermAttribute)AddAttribute(typeof(TermAttribute));
                this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
            }
Example #16
0
        private string[] GetAnalyzedText(string field, string text)
        {
            var reader      = new StringReader(text);
            var tokenStream = _masterAnalyzer.TokenStream(field, reader);

            _termAtt = (TermAttribute)tokenStream.AddAttribute(typeof(TermAttribute));

            var tokens = new List <string>();
            var words  = new List <string>();

            while (tokenStream.IncrementToken())
            {
                tokens.Add(_termAtt.ToString());
                words.Add(_termAtt.Term());
            }
            return(words.ToArray());
        }
Example #17
0
        /**
         * Creates NGramTokenFilter with given min and max n-grams.
         * <param name="input"><see cref="TokenStream"/> holding the input to be tokenized</param>
         * <param name="minGram">the smallest n-gram to generate</param>
         * <param name="maxGram">the largest n-gram to generate</param>
         */
        public NGramTokenFilter(TokenStream input, int minGram, int maxGram)
            : base(input)
        {
            if (minGram < 1)
            {
                throw new System.ArgumentException("minGram must be greater than zero");
            }
            if (minGram > maxGram)
            {
                throw new System.ArgumentException("minGram must not be greater than maxGram");
            }
            _minGram = minGram;
            _maxGram = maxGram;

            _termAtt   = (TermAttribute)AddAttribute <ITermAttribute>();
            _offsetAtt = (OffsetAttribute)AddAttribute <IOffsetAttribute>();
        }
Example #18
0
        public void TestUnaccentedWordAnalyzer()
        {
            TopDocs td   = null;
            string  text = "[email protected] 123.456 ğüşıöç%ĞÜŞİÖÇ$ΑΒΓΔΕΖ#АБВГДЕ SSß";

            string[] expectedTokens = new string[] { "name", "surname", "gmail", "com", "123", "456", "gusioc", "gusioc", "αβγδεζ", "абвгде", "ssss" };

            UnaccentedWordAnalyzer analyzer = new UnaccentedWordAnalyzer();
            TokenStream            ts       = analyzer.TokenStream("", new System.IO.StringReader(text));

            int           i             = 0;
            TermAttribute termAttribute = (TermAttribute)ts.GetAttribute(typeof(TermAttribute));

            while (ts.IncrementToken())
            {
                Assert.AreEqual(expectedTokens[i++], termAttribute.Term());
                System.Diagnostics.Debug.WriteLine(termAttribute.Term());
            }

            QueryParser   p   = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "field", analyzer);
            IndexSearcher src = CreateIndex(text, analyzer);

            td = src.Search(p.Parse("ĞÜŞıöç"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("name"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("surname"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("NAME.surname"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("surname@gmail"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("name@gmail"), 10);
            Assert.AreEqual(0, td.totalHits);

            td = src.Search(p.Parse("456"), 10);
            Assert.AreEqual(1, td.totalHits);

            td = src.Search(p.Parse("123.456"), 10);
            Assert.AreEqual(1, td.totalHits);
        }
Example #19
0
        /**
         * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
         *
         * @param input {@link TokenStream} holding the input to be tokenized
         * @param side the {@link Side} from which to chop off an n-gram
         * @param minGram the smallest n-gram to generate
         * @param maxGram the largest n-gram to generate
         */
        public EdgeNGramTokenFilter(TokenStream input, Side side, int minGram, int maxGram)
            : base(input)
        {
            if (minGram < 1)
            {
                throw new IllegalArgumentException("minGram must be greater than zero");
            }

            if (minGram > maxGram)
            {
                throw new IllegalArgumentException("minGram must not be greater than maxGram");
            }

            this.minGram = minGram;
            this.maxGram = maxGram;
            this.side    = side;
            termAtt      = (TermAttribute)addAttribute(typeof(TermAttribute));
            offsetAtt    = (OffsetAttribute)addAttribute(typeof(OffsetAttribute));
        }
Example #20
0
        public static IEnumerable <string> TokensFromAnalysis(Analyzer analyzer, String text)
        {
            TokenStream   stream    = analyzer.TokenStream("contents", new StringReader(text));
            List <string> result    = new List <string>();
            TermAttribute tokenAttr = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            while (stream.IncrementToken())
            {
                Console.WriteLine("Buffer:={0}, Length:={1}, Term:={2}".FormatWith(tokenAttr.TermBuffer(), tokenAttr.TermLength(), tokenAttr.Term()));
                result.Add(tokenAttr.Term());
            }



            //tokenAttr.

            stream.End();
            stream.Close();

            return(result);
        }
Example #21
0
        static List <string> TokenizeStandard(string content, TokenizeConfig config)
        {
            StringReader reader = new StringReader(content);
            TokenStream  result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_24, reader);

            var stophash = StopFilter.MakeStopSet(config.StopWords);

            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new StopFilter(true, result, stophash, true);

            /// Set up lexicon/invertlexicon, featurevectors, wordappearancecount ///
            result.Reset();
            TermAttribute termattr = (TermAttribute)result.GetAttribute(typeof(TermAttribute));
            List <string> words    = new List <string>();

            while (result.IncrementToken())
            {
                words.Add(termattr.Term());
            }
            return(words);
        }
        /// <summary> Simple similarity query generators.
        /// Takes every unique word and forms a boolean query where all words are optional.
        /// After you get this you'll use to to query your {@link IndexSearcher} for similar docs.
        /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
        /// need to then ignore that.
        ///
        /// <p>
        ///
        /// So, if you have a code fragment like this:
        /// <br>
        /// <code>
        /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
        /// </code>
        ///
        /// <p>
        ///
        /// </summary>
        /// <summary> The query returned, in string form, will be <code>'(i use lucene to search fast searchers are good')</code>.
        ///
        /// <p>
        /// The philosophy behind this method is "two documents are similar if they share lots of words".
        /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
        ///
        /// <P>
        /// This method is fail-safe in that if a long 'body' is passed in and
        /// {@link BooleanQuery#add BooleanQuery.add()} (used internally)
        /// throws
        /// {@link org.apache.lucene.search.BooleanQuery.TooManyClauses BooleanQuery.TooManyClauses}, the
        /// query as it is will be returned.
        ///
        ///
        ///
        ///
        ///
        /// </summary>
        /// <param name="body">the body of the document you want to find similar documents to
        /// </param>
        /// <param name="a">the analyzer to use to parse the body
        /// </param>
        /// <param name="field">the field you want to search on, probably something like "contents" or "body"
        /// </param>
        /// <param name="stop">optional set of stop words to ignore
        /// </param>
        /// <returns> a query with all unique words in 'body'
        /// </returns>
        /// <throws>  IOException this can't happen... </throws>
        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
        {
            TokenStream   ts      = a.TokenStream(field, new System.IO.StringReader(body));
            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));

            BooleanQuery tmp = new BooleanQuery();

            System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups
            while (ts.IncrementToken())
            {
                String word = termAtt.Term();
                // ignore opt stop words
                if (stop != null && stop.Contains(word))
                {
                    continue;
                }
                // ignore dups
                if (already.Contains(word) == true)
                {
                    continue;
                }
                already.Add(word, word);
                // add to query
                TermQuery tq = new TermQuery(new Term(field, word));
                try
                {
                    tmp.Add(tq, BooleanClause.Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    // fail-safe, just return what we have, not the end of the world
                    break;
                }
            }
            return(tmp);
        }
        private static void  ConsumeStreamNewAPI(TokenStream stream)
        {
            stream.Reset();
            PayloadAttribute payloadAtt = (PayloadAttribute)stream.AddAttribute(typeof(PayloadAttribute));
            TermAttribute    termAtt    = (TermAttribute)stream.AddAttribute(typeof(TermAttribute));

            int i = 0;

            while (stream.IncrementToken())
            {
                System.String term = termAtt.Term();
                Payload       p    = payloadAtt.GetPayload();
                if (p != null && p.GetData().Length == 1 && p.GetData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION)
                {
                    Assert.IsTrue("tokenstream".Equals(term), "only TokenStream is a proper noun");
                }
                else
                {
                    Assert.IsFalse("tokenstream".Equals(term), "all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)");
                }
                Assert.AreEqual(results[i], term);
                i++;
            }
        }
 public void Init()
 {
     instance = new TermAttribute();
 }
 private void Init()
 {
     termAtt = AddAttribute <TermAttribute>();
 }
 public static void Append(this TermAttribute termAtt, string str)
 {
     termAtt.SetTermBuffer(termAtt.Term() + str);             // TODO: Not optimal, but works
 }
 public CollationKeyFilter(TokenStream input, CultureInfo cultureInfo) : base(input)
 {
     _cultureInfo = cultureInfo;
     _termAtt     = (TermAttribute)AddAttribute <ITermAttribute>();
 }
Example #28
0
 public SynonymFilter(TokenStream input) : base(input)
 {
     _termAtt    = (TermAttribute)AddAttribute <ITermAttribute>();
     _posIncrAtt = (PositionIncrementAttribute)AddAttribute <IPositionIncrementAttribute>();
 }
Example #29
0
        private void AddTerms(IndexReader reader, FieldVals f)
        {
            if (f.queryString == null)
            {
                return;
            }
            TokenStream   ts      = analyzer.TokenStream(f.fieldName, new System.IO.StringReader(f.queryString));
            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));

            int       corpusNumDocs            = reader.NumDocs();
            Term      internSavingTemplateTerm = new Term(f.fieldName); //optimization to avoid constructing new Term() objects
            Hashtable processedTerms           = new Hashtable();

            while (ts.IncrementToken())
            {
                String term = termAtt.Term();
                if (!processedTerms.Contains(term))
                {
                    processedTerms.Add(term, term);
                    ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                    float          minScore  = 0;
                    Term           startTerm = internSavingTemplateTerm.CreateTerm(term);
                    FuzzyTermEnum  fe        = new FuzzyTermEnum(reader, startTerm, f.minSimilarity, f.prefixLength);
                    TermEnum       origEnum  = reader.Terms(startTerm);
                    int            df        = 0;
                    if (startTerm.Equals(origEnum.Term()))
                    {
                        df = origEnum.DocFreq(); //store the df so all variants use same idf
                    }
                    int numVariants          = 0;
                    int totalVariantDocFreqs = 0;
                    do
                    {
                        Term possibleMatch = fe.Term();
                        if (possibleMatch != null)
                        {
                            numVariants++;
                            totalVariantDocFreqs += fe.DocFreq();
                            float score = fe.Difference();
                            if (variantsQ.Size() < MAX_VARIANTS_PER_TERM || score > minScore)
                            {
                                ScoreTerm st = new ScoreTerm(possibleMatch, score, startTerm);
                                variantsQ.Insert(st);
                                minScore = ((ScoreTerm)variantsQ.Top()).score; // maintain minScore
                            }
                        }
                    }while (fe.Next());
                    if (numVariants > 0)
                    {
                        int avgDf = totalVariantDocFreqs / numVariants;
                        if (df == 0)    //no direct match we can use as df for all variants
                        {
                            df = avgDf; //use avg df of all variants
                        }

                        // take the top variants (scored by edit distance) and reset the score
                        // to include an IDF factor then add to the global queue for ranking
                        // overall top query terms
                        int size = variantsQ.Size();
                        for (int i = 0; i < size; i++)
                        {
                            ScoreTerm st = (ScoreTerm)variantsQ.Pop();
                            st.score = (st.score * st.score) * sim.Idf(df, corpusNumDocs);
                            q.Insert(st);
                        }
                    }
                }
            }
        }
 public static void Append(this TermAttribute termAtt, char ch)
 {
     termAtt.SetTermBuffer(termAtt.Term() + new string(new[] { ch }));             // TODO: Not optimal, but works
 }