Example #1
0
        private void DoTestSeekDoesNotExist(Random r, int numField, IList <Term> fieldTerms, Term[] fieldTermsArray, IndexReader reader)
        {
            IDictionary <string, TermsEnum> tes = new Dictionary <string, TermsEnum>();

            if (Verbose)
            {
                Console.WriteLine("TEST: top random seeks");
            }

            {
                int num = AtLeast(100);
                for (int iter = 0; iter < num; iter++)
                {
                    // seek to random spot
                    string field = ("f" + r.Next(numField)).Intern();
                    Term   tx    = new Term(field, GetRandomString(r));

                    int spot = Array.BinarySearch(fieldTermsArray, tx);

                    if (spot < 0)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: non-exist seek to " + field + ":" + UnicodeUtil.ToHexString(tx.Text));
                        }

                        // term does not exist:
                        if (!tes.TryGetValue(field, out TermsEnum te))
                        {
                            te         = MultiFields.GetTerms(reader, field).GetEnumerator();
                            tes[field] = te;
                        }

                        if (Verbose)
                        {
                            Console.WriteLine("  got enum");
                        }

                        spot = -spot - 1;

                        if (spot == fieldTerms.Count || !fieldTerms[spot].Field.Equals(field, StringComparison.Ordinal))
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.END, te.SeekCeil(tx.Bytes));
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(tx.Bytes));

                            if (Verbose)
                            {
                                Console.WriteLine("  got term=" + UnicodeUtil.ToHexString(te.Term.Utf8ToString()));
                                Console.WriteLine("  exp term=" + UnicodeUtil.ToHexString(fieldTerms[spot].Text));
                            }

                            Assert.AreEqual(fieldTerms[spot].Bytes, te.Term);

                            // now .next() this many times:
                            int ct = TestUtil.NextInt32(r, 5, 100);
                            for (int i = 0; i < ct; i++)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine("TEST: now next()");
                                }
                                if (1 + spot + i >= fieldTerms.Count)
                                {
                                    break;
                                }
                                Term term = fieldTerms[1 + spot + i];
                                if (!term.Field.Equals(field, StringComparison.Ordinal))
                                {
                                    Assert.IsFalse(te.MoveNext());
                                    break;
                                }
                                else
                                {
                                    Assert.IsTrue(te.MoveNext());
                                    BytesRef t = te.Term;

                                    if (Verbose)
                                    {
                                        Console.WriteLine("  got term=" + (t is null ? null : UnicodeUtil.ToHexString(t.Utf8ToString())));
                                        Console.WriteLine("       exp=" + UnicodeUtil.ToHexString(term.Text.ToString()));
                                    }

                                    Assert.AreEqual(term.Bytes, t);
                                }
                            }
                        }
                    }
                }
            }
        }
Example #2
0
        public virtual void Test10kPulsed()
        {
            // we always run this test with pulsing codec.
            Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1));

            DirectoryInfo        f   = CreateTempDir("10kpulsed");
            BaseDirectoryWrapper dir = NewFSDirectory(f);

            dir.CheckIndexOnDispose = false; // we do this ourselves explicitly
            RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp));

            Document  document = new Document();
            FieldType ft       = new FieldType(TextField.TYPE_STORED);

            switch (TestUtil.NextInt32(Random, 0, 2))
            {
            case 0:
                ft.IndexOptions = IndexOptions.DOCS_ONLY;
                break;

            case 1:
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                break;

            default:
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                break;
            }

            Field field = NewField("field", "", ft);

            document.Add(field);

            //NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));  // LUCENENET specific:  Use .ToString formating instead

            for (int i = 0; i < 10050; i++)
            {
                //field.StringValue = df.format(i);
                field.SetStringValue(i.ToString("00000", CultureInfo.InvariantCulture));
                iw.AddDocument(document);
            }

            IndexReader ir = iw.GetReader();

            iw.Dispose();

            TermsEnum te = MultiFields.GetTerms(ir, "field").GetEnumerator();
            DocsEnum  de = null;

            for (int i = 0; i < 10050; i++)
            {
                //string expected = df.format(i);
                string expected = i.ToString("00000", CultureInfo.InvariantCulture);
                te.MoveNext();
                assertEquals(expected, te.Term.Utf8ToString());
                de = TestUtil.Docs(Random, te, null, de, DocsFlags.NONE);
                assertTrue(de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.NextDoc());
            }
            ir.Dispose();

            TestUtil.CheckIndex(dir);
            dir.Dispose();
        }
Example #3
0
        // randomly seeks to term that we know exists, then next's
        // from there
        private void DoTestSeekExists(Random r, IList <Term> fieldTerms, IndexReader reader)
        {
            IDictionary <string, TermsEnum> tes = new Dictionary <string, TermsEnum>();

            // Test random seek to existing term, then enum:
            if (Verbose)
            {
                Console.WriteLine("\nTEST: top now seek");
            }

            int num = AtLeast(100);

            for (int iter = 0; iter < num; iter++)
            {
                // pick random field+term
                int    spot  = r.Next(fieldTerms.Count);
                Term   term  = fieldTerms[spot];
                string field = term.Field;

                if (Verbose)
                {
                    Console.WriteLine("TEST: exist seek field=" + field + " term=" + UnicodeUtil.ToHexString(term.Text));
                }

                // seek to it
                if (!tes.TryGetValue(field, out TermsEnum te))
                {
                    te         = MultiFields.GetTerms(reader, field).GetEnumerator();
                    tes[field] = te;
                }

                if (Verbose)
                {
                    Console.WriteLine("  done get enum");
                }

                // seek should find the term
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(term.Bytes));

                // now .next() this many times:
                int ct = TestUtil.NextInt32(r, 5, 100);
                for (int i = 0; i < ct; i++)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: now next()");
                    }
                    if (1 + spot + i >= fieldTerms.Count)
                    {
                        break;
                    }
                    term = fieldTerms[1 + spot + i];
                    if (!term.Field.Equals(field, StringComparison.Ordinal))
                    {
                        Assert.IsFalse(te.MoveNext());
                        break;
                    }
                    else
                    {
                        Assert.IsTrue(te.MoveNext());
                        BytesRef t = te.Term;

                        if (Verbose)
                        {
                            Console.WriteLine("  got term=" + (t is null ? null : UnicodeUtil.ToHexString(t.Utf8ToString())));
                            Console.WriteLine("       exp=" + UnicodeUtil.ToHexString(term.Text.ToString()));
                        }

                        Assert.AreEqual(term.Bytes, t);
                    }
                }
            }
        }
        private void AddTerms(IndexReader reader, FieldVals f)
        {
            if (f.queryString == null)
            {
                return;
            }
            Terms terms = MultiFields.GetTerms(reader, f.fieldName);

            if (terms == null)
            {
                return;
            }
            TokenStream ts = analyzer.GetTokenStream(f.fieldName, f.queryString);

            try
            {
                ICharTermAttribute termAtt = ts.AddAttribute <ICharTermAttribute>();

                int           corpusNumDocs  = reader.NumDocs;
                ISet <string> processedTerms = new JCG.HashSet <string>();
                ts.Reset();
                while (ts.IncrementToken())
                {
                    string term = termAtt.ToString();
                    if (!processedTerms.Contains(term))
                    {
                        processedTerms.Add(term);
                        ScoreTermQueue  variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                        float           minScore  = 0;
                        Term            startTerm = new Term(f.fieldName, term);
                        AttributeSource atts      = new AttributeSource();
                        IMaxNonCompetitiveBoostAttribute maxBoostAtt =
                            atts.AddAttribute <IMaxNonCompetitiveBoostAttribute>();
#pragma warning disable 612, 618
                        SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength);
#pragma warning restore 612, 618
                        //store the df so all variants use same idf
                        int             df                   = reader.DocFreq(startTerm);
                        int             numVariants          = 0;
                        int             totalVariantDocFreqs = 0;
                        BytesRef        possibleMatch;
                        IBoostAttribute boostAtt =
                            fe.Attributes.AddAttribute <IBoostAttribute>();
                        while ((possibleMatch = fe.Next()) != null)
                        {
                            numVariants++;
                            totalVariantDocFreqs += fe.DocFreq;
                            float score = boostAtt.Boost;
                            if (variantsQ.Count < MAX_VARIANTS_PER_TERM || score > minScore)
                            {
                                ScoreTerm st = new ScoreTerm(new Term(startTerm.Field, BytesRef.DeepCopyOf(possibleMatch)), score, startTerm);
                                variantsQ.InsertWithOverflow(st);
                                minScore = variantsQ.Top.Score; // maintain minScore
                            }
                            maxBoostAtt.MaxNonCompetitiveBoost = variantsQ.Count >= MAX_VARIANTS_PER_TERM ? minScore : float.NegativeInfinity;
                        }

                        if (numVariants > 0)
                        {
                            int avgDf = totalVariantDocFreqs / numVariants;
                            if (df == 0)    //no direct match we can use as df for all variants
                            {
                                df = avgDf; //use avg df of all variants
                            }

                            // take the top variants (scored by edit distance) and reset the score
                            // to include an IDF factor then add to the global queue for ranking
                            // overall top query terms
                            int size = variantsQ.Count;
                            for (int i = 0; i < size; i++)
                            {
                                ScoreTerm st = variantsQ.Pop();
                                st.Score = (st.Score * st.Score) * sim.Idf(df, corpusNumDocs);
                                q.InsertWithOverflow(st);
                            }
                        }
                    }
                }
                ts.End();
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(ts);
            }
        }
Example #5
0
        /// <summary>
        /// Build the suggest index, using up to the specified
        /// amount of temporary RAM while building.  Note that
        /// the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(IInputEnumerator enumerator, double ramBufferSizeMB)
        {
            // LUCENENET: Added guard clause for null
            if (enumerator is null)
            {
                throw new ArgumentNullException(nameof(enumerator));
            }

            if (enumerator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();

            // LUCENENET specific - using GetRandomFileName() instead of picking a random int
            DirectoryInfo tempIndexPath; // LUCENENET: IDE0059: Remove unnecessary value assignment

            while (true)
            {
                tempIndexPath = new DirectoryInfo(Path.Combine(directory.FullName, prefix + ".index." + Path.GetFileNameWithoutExtension(Path.GetRandomFileName())));
                tempIndexPath.Create();
                if (System.IO.Directory.Exists(tempIndexPath.FullName))
                {
                    break;
                }
            }

            Directory dir = FSDirectory.Open(tempIndexPath);

            try
            {
#pragma warning disable 612, 618
                IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, indexAnalyzer);
#pragma warning restore 612, 618
                iwc.SetOpenMode(OpenMode.CREATE);
                iwc.SetRAMBufferSizeMB(ramBufferSizeMB);
                IndexWriter writer = new IndexWriter(dir, iwc);

                var ft = new FieldType(TextField.TYPE_NOT_STORED);
                // TODO: if only we had IndexOptions.TERMS_ONLY...
                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS;
                ft.OmitNorms    = true;
                ft.Freeze();

                Document doc   = new Document();
                Field    field = new Field("body", "", ft);
                doc.Add(field);

                totTokens = 0;
                IndexReader reader = null;

                bool success = false;
                count = 0;
                try
                {
                    while (enumerator.MoveNext())
                    {
                        BytesRef surfaceForm = enumerator.Current;
                        field.SetStringValue(surfaceForm.Utf8ToString());
                        writer.AddDocument(doc);
                        count++;
                    }

                    reader = DirectoryReader.Open(writer, false);

                    Terms terms = MultiFields.GetTerms(reader, "body");
                    if (terms is null)
                    {
                        throw new ArgumentException("need at least one suggestion");
                    }

                    // Move all ngrams into an FST:
                    TermsEnum termsEnum = terms.GetEnumerator(null);

                    Outputs <long?> outputs = PositiveInt32Outputs.Singleton;
                    Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                    Int32sRef scratchInts = new Int32sRef();
                    while (termsEnum.MoveNext())
                    {
                        BytesRef term       = termsEnum.Term;
                        int      ngramCount = CountGrams(term);
                        if (ngramCount > grams)
                        {
                            throw new ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                        }
                        if (ngramCount == 1)
                        {
                            totTokens += termsEnum.TotalTermFreq;
                        }

                        builder.Add(Lucene.Net.Util.Fst.Util.ToInt32sRef(term, scratchInts), EncodeWeight(termsEnum.TotalTermFreq));
                    }

                    fst = builder.Finish();
                    if (fst is null)
                    {
                        throw new ArgumentException("need at least one suggestion");
                    }
                    //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                    /*
                     * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                     * Util.toDot(fst, pw, true, true);
                     * pw.close();
                     */

                    success = true;
                }
                finally
                {
                    if (success)
                    {
                        IOUtils.Dispose(writer, reader);
                    }
                    else
                    {
                        IOUtils.DisposeWhileHandlingException(writer, reader);
                    }
                }
            }
            finally
            {
                try
                {
                    IOUtils.Dispose(dir);
                }
                finally
                {
                    // LUCENENET specific - since we are removing the entire directory anyway,
                    // it doesn't make sense to first do a loop in order remove the files.
                    // Let the System.IO.Directory.Delete() method handle that.
                    // We also need to dispose the Directory instance first before deleting from disk.
                    try
                    {
                        System.IO.Directory.Delete(tempIndexPath.FullName, true);
                    }
                    catch (Exception e)
                    {
                        throw IllegalStateException.Create("failed to remove " + tempIndexPath, e);
                    }
                }
            }
        }
Example #6
0
        public virtual void TestSimple()
        {
            int numNodes = TestUtil.NextInt32(Random, 1, 10);

            double runTimeSec = AtLeast(3);

            int minDocsToMakeTerms = TestUtil.NextInt32(Random, 5, 20);

            int maxSearcherAgeSeconds = TestUtil.NextInt32(Random, 1, 3);

            if (Verbose)
            {
                Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds);
            }

            Start(numNodes, runTimeSec, maxSearcherAgeSeconds);

            JCG.List <PreviousSearchState> priorSearches = new JCG.List <PreviousSearchState>();
            IList <BytesRef> terms = null;

            while (J2N.Time.NanoTime() < endTimeNanos)
            {
                bool doFollowon = priorSearches.Count > 0 && Random.Next(7) == 1;

                // Pick a random node; we will run the query on this node:
                int myNodeID = Random.Next(numNodes);

                NodeState.ShardIndexSearcher localShardSearcher;

                PreviousSearchState prevSearchState;

                if (doFollowon)
                {
                    // Pretend user issued a followon query:
                    prevSearchState = priorSearches[Random.Next(priorSearches.Count)];

                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: follow-on query age=" + ((J2N.Time.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0));
                    }

                    try
                    {
                        localShardSearcher = m_nodes[myNodeID].Acquire(prevSearchState.Versions);
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected, sometimes; in a "real" app we would
                        // either forward this error to the user ("too
                        // much time has passed; please re-run your
                        // search") or sneakily just switch to newest
                        // searcher w/o telling them...
                        if (Verbose)
                        {
                            Console.WriteLine("  searcher expired during local shard searcher init: " + see);
                        }
                        priorSearches.Remove(prevSearchState);
                        continue;
                    }
                }
                else
                {
                    if (Verbose)
                    {
                        Console.WriteLine("\nTEST: fresh query");
                    }
                    // Do fresh query:
                    localShardSearcher = m_nodes[myNodeID].Acquire();
                    prevSearchState    = null;
                }

                IndexReader[] subs = new IndexReader[numNodes];

                PreviousSearchState searchState = null;

                try
                {
                    // Mock: now make a single reader (MultiReader) from all node
                    // searchers.  In a real shard env you can't do this... we
                    // do it to confirm results from the shard searcher
                    // are correct:
                    int docCount = 0;
                    try
                    {
                        for (int nodeID = 0; nodeID < numNodes; nodeID++)
                        {
                            long          subVersion = localShardSearcher.GetNodeVersions()[nodeID];
                            IndexSearcher sub        = m_nodes[nodeID].Searchers.Acquire(subVersion);
                            if (sub == null)
                            {
                                nodeID--;
                                while (nodeID >= 0)
                                {
                                    subs[nodeID].DecRef();
                                    subs[nodeID] = null;
                                    nodeID--;
                                }
                                throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion);
                            }
                            subs[nodeID] = sub.IndexReader;
                            docCount    += subs[nodeID].MaxDoc;
                        }
                    }
                    catch (SearcherExpiredException see)
                    {
                        // Expected
                        if (Verbose)
                        {
                            Console.WriteLine("  searcher expired during mock reader init: " + see);
                        }
                        continue;
                    }

                    IndexReader   mockReader   = new MultiReader(subs);
                    IndexSearcher mockSearcher = new IndexSearcher(mockReader);

                    Query query;
                    Sort  sort;

                    if (prevSearchState != null)
                    {
                        query = prevSearchState.Query;
                        sort  = prevSearchState.Sort;
                    }
                    else
                    {
                        if (terms == null && docCount > minDocsToMakeTerms)
                        {
                            // TODO: try to "focus" on high freq terms sometimes too
                            // TODO: maybe also periodically reset the terms...?
                            TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").GetEnumerator();
                            terms = new JCG.List <BytesRef>();
                            while (termsEnum.MoveNext())
                            {
                                terms.Add(BytesRef.DeepCopyOf(termsEnum.Term));
                            }
                            if (Verbose)
                            {
                                Console.WriteLine("TEST: init terms: " + terms.Count + " terms");
                            }
                            if (terms.Count == 0)
                            {
                                terms = null;
                            }
                        }

                        if (Verbose)
                        {
                            Console.WriteLine("  maxDoc=" + mockReader.MaxDoc);
                        }

                        if (terms != null)
                        {
                            if (Random.NextBoolean())
                            {
                                query = new TermQuery(new Term("body", terms[Random.Next(terms.Count)]));
                            }
                            else
                            {
                                string t = terms[Random.Next(terms.Count)].Utf8ToString();
                                string prefix;
                                if (t.Length <= 1)
                                {
                                    prefix = t;
                                }
                                else
                                {
                                    prefix = t.Substring(0, TestUtil.NextInt32(Random, 1, 2));
                                }
                                query = new PrefixQuery(new Term("body", prefix));
                            }

                            if (Random.NextBoolean())
                            {
                                sort = null;
                            }
                            else
                            {
                                // TODO: sort by more than 1 field
                                int what = Random.Next(3);
                                if (what == 0)
                                {
                                    sort = new Sort(SortField.FIELD_SCORE);
                                }
                                else if (what == 1)
                                {
                                    // TODO: this sort doesn't merge
                                    // correctly... it's tricky because you
                                    // could have > 2.1B docs across all shards:
                                    //sort = new Sort(SortField.FIELD_DOC);
                                    sort = null;
                                }
                                else if (what == 2)
                                {
                                    sort = new Sort(new SortField[] { new SortField("docid", SortFieldType.INT32, Random.NextBoolean()) });
                                }
                                else
                                {
                                    sort = new Sort(new SortField[] { new SortField("title", SortFieldType.STRING, Random.NextBoolean()) });
                                }
                            }
                        }
                        else
                        {
                            query = null;
                            sort  = null;
                        }
                    }

                    if (query != null)
                    {
                        try
                        {
                            searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState);
                        }
                        catch (SearcherExpiredException see)
                        {
                            // Expected; in a "real" app we would
                            // either forward this error to the user ("too
                            // much time has passed; please re-run your
                            // search") or sneakily just switch to newest
                            // searcher w/o telling them...
                            if (Verbose)
                            {
                                Console.WriteLine("  searcher expired during search: " + see);
                                Console.Out.Write(see.StackTrace);
                            }
                            // We can't do this in general: on a very slow
                            // computer it's possible the local searcher
                            // expires before we can finish our search:
                            // assert prevSearchState != null;
                            if (prevSearchState != null)
                            {
                                priorSearches.Remove(prevSearchState);
                            }
                        }
                    }
                }
                finally
                {
                    //m_nodes[myNodeID].Release(localShardSearcher);
                    NodeState.Release(localShardSearcher); // LUCENENET: Made Release() static per CA1822 for performance
                    foreach (IndexReader sub in subs)
                    {
                        if (sub != null)
                        {
                            sub.DecRef();
                        }
                    }
                }

                if (searchState != null && searchState.SearchAfterLocal != null && Random.Next(5) == 3)
                {
                    priorSearches.Add(searchState);
                    if (priorSearches.Count > 200)
                    {
                        priorSearches.Shuffle(Random);
                        priorSearches.RemoveRange(100, priorSearches.Count - 100); // LUCENENET: Converted end index to length
                    }
                }
            }

            Finish();
        }
Example #7
0
        /// <summary>
        /// Build the suggest index, using up to the specified
        ///  amount of temporary RAM while building.  Note that
        ///  the weights for the suggestions are ignored.
        /// </summary>
        public virtual void Build(InputIterator iterator, double ramBufferSizeMB)
        {
            if (iterator.HasPayloads())
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts())
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }

            string prefix    = this.GetType().Name;
            var    directory = OfflineSorter.DefaultTempDir();
            // TODO: messy ... java7 has Files.createTempDirectory
            // ... but 4.x is java6:
            File   tempIndexPath = null;
            Random random        = new Random();

            while (true)
            {
                tempIndexPath = new File(directory, prefix + ".index." + random.Next(int.MaxValue));
                if (tempIndexPath.mkdir())
                {
                    break;
                }
            }

            Directory dir = FSDirectory.Open(tempIndexPath);

            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, indexAnalyzer);

            iwc.OpenMode        = IndexWriterConfig.OpenMode.CREATE;
            iwc.RAMBufferSizeMB = ramBufferSizeMB;
            IndexWriter writer = new IndexWriter(dir, iwc);

            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

            // TODO: if only we had IndexOptions.TERMS_ONLY...
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
            ft.OmitNorms    = true;
            ft.Freeze();

            Document doc   = new Document();
            Field    field = new Field("body", "", ft);

            doc.Add(field);

            totTokens = 0;
            IndexReader reader = null;

            bool success = false;

            count = 0;
            try
            {
                while (true)
                {
                    BytesRef surfaceForm = iterator.Next();
                    if (surfaceForm == null)
                    {
                        break;
                    }
                    field.StringValue = surfaceForm.Utf8ToString();
                    writer.AddDocument(doc);
                    count++;
                }
                reader = DirectoryReader.Open(writer, false);

                Terms terms = MultiFields.GetTerms(reader, "body");
                if (terms == null)
                {
                    throw new System.ArgumentException("need at least one suggestion");
                }

                // Move all ngrams into an FST:
                TermsEnum termsEnum = terms.Iterator(null);

                Outputs <long?> outputs = PositiveIntOutputs.Singleton;
                Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

                IntsRef scratchInts = new IntsRef();
                while (true)
                {
                    BytesRef term = termsEnum.next();
                    if (term == null)
                    {
                        break;
                    }
                    int ngramCount = countGrams(term);
                    if (ngramCount > grams)
                    {
                        throw new System.ArgumentException("tokens must not contain separator byte; got token=" + term + " but gramCount=" + ngramCount + ", which is greater than expected max ngram size=" + grams);
                    }
                    if (ngramCount == 1)
                    {
                        totTokens += termsEnum.TotalTermFreq();
                    }

                    builder.Add(Util.ToIntsRef(term, scratchInts), encodeWeight(termsEnum.TotalTermFreq()));
                }

                fst = builder.Finish();
                if (fst == null)
                {
                    throw new System.ArgumentException("need at least one suggestion");
                }
                //System.out.println("FST: " + fst.getNodeCount() + " nodes");

                /*
                 * PrintWriter pw = new PrintWriter("/x/tmp/out.dot");
                 * Util.toDot(fst, pw, true, true);
                 * pw.close();
                 */

                success = true;
            }
            finally
            {
                try
                {
                    if (success)
                    {
                        IOUtils.Close(writer, reader);
                    }
                    else
                    {
                        IOUtils.CloseWhileHandlingException(writer, reader);
                    }
                }
                finally
                {
                    foreach (string file in dir.ListAll())
                    {
                        File path = new File(tempIndexPath, file);
                        if (path.Delete() == false)
                        {
                            throw new InvalidOperationException("failed to remove " + path);
                        }
                    }

                    if (tempIndexPath.Delete() == false)
                    {
                        throw new InvalidOperationException("failed to remove " + tempIndexPath);
                    }

                    dir.Dispose();
                }
            }
        }