A block-based terms index and dictionary that assigns terms to variable length blocks according to how they share prefixes. The terms index is a prefix trie whose leaves are term blocks. The advantage of this approach is that seekExact is often able to determine a term cannot exist without doing any IO, and intersection with Automata is very fast. Note that this terms dictionary has it's own fixed terms index (ie, it does not support a pluggable terms index implementation).

NOTE: this terms dictionary does not support index divisor when opening an IndexReader. Instead, you can change the min/maxItemsPerBlock during indexing.

The data structure used by this implementation is very similar to a burst trie (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499), but with added logic to break up too-large blocks of all terms sharing a given prefix into smaller ones.

Use Lucene.Net.Index.CheckIndex with the -verbose option to see summary statistics on the blocks in the dictionary. See BlockTreeTermsWriter. @lucene.experimental

Inheritance: Lucene.Net.Codecs.FieldsProducer
 public Frame(BlockTreeTermsReader.FieldReader.IntersectEnum outerInstance, int ord)
 {
     this.OuterInstance = outerInstance;
     this.Ord = ord;
     this.TermState = outerInstance.OuterInstance.OuterInstance.PostingsReader.NewTermState();
     this.TermState.TotalTermFreq = -1;
     this.Longs = new long[outerInstance.OuterInstance.LongsSize];
 }
                public SegmentTermsEnum(BlockTreeTermsReader.FieldReader outerInstance)
                {
                    this.OuterInstance = outerInstance;
                    //if (DEBUG) System.out.println("BTTR.init seg=" + segment);
                    Stack = new Frame[0];

                    // Used to hold seek by TermState, or cached seek
                    StaticFrame = new Frame(this, -1);

                    if (outerInstance.Index == null)
                    {
                        FstReader = null;
                    }
                    else
                    {
                        FstReader = OuterInstance.Index.BytesReader;
                    }

                    // Init w/ root block; don't use index since it may
                    // not (and need not) have been loaded
                    for (int arcIdx = 0; arcIdx < Arcs.Length; arcIdx++)
                    {
                        Arcs[arcIdx] = new FST<BytesRef>.Arc<BytesRef>();
                    }

                    CurrentFrame = StaticFrame;
                    FST<BytesRef>.Arc<BytesRef> arc;
                    if (outerInstance.Index != null)
                    {
                        arc = outerInstance.Index.GetFirstArc(Arcs[0]);
                        // Empty string prefix must have an output in the index!
                        Debug.Assert(arc.Final);
                    }
                    else
                    {
                        arc = null;
                    }
                    CurrentFrame = StaticFrame;
                    //currentFrame = pushFrame(arc, rootCode, 0);
                    //currentFrame.loadBlock();
                    ValidIndexPrefix = 0;
                    // if (DEBUG) {
                    //   System.out.println("init frame state " + currentFrame.ord);
                    //   printSeekState();
                    // }

                    //System.out.println();
                    // computeBlockStats().print(System.out);
                }
            //private boolean DEBUG;

            internal FieldReader(BlockTreeTermsReader outerInstance, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize, IndexInput indexIn)
            {
                this.OuterInstance = outerInstance;
                Debug.Assert(numTerms > 0);
                this.fieldInfo = fieldInfo;
                //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
                this.NumTerms = numTerms;
                this.SumTotalTermFreq_Renamed = sumTotalTermFreq;
                this.SumDocFreq_Renamed = sumDocFreq;
                this.DocCount_Renamed = docCount;
                this.IndexStartFP = indexStartFP;
                this.RootCode = rootCode;
                this.LongsSize = longsSize;
                // if (DEBUG) {
                //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
                // }

                RootBlockFP = (int)((uint)(new ByteArrayDataInput((byte[])(Array)rootCode.Bytes, rootCode.Offset, rootCode.Length)).ReadVLong() >> BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS);

                if (indexIn != null)
                {
                    IndexInput clone = (IndexInput)indexIn.Clone();
                    //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
                    clone.Seek(indexStartFP);
                    Index = new FST<BytesRef>(clone, ByteSequenceOutputs.Singleton);

                    /*
                    if (false) {
                      final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
                      Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
                      Util.toDot(index, w, false, false);
                      System.out.println("FST INDEX: SAVED to " + dotFileName);
                      w.close();
                    }
                    */
                }
                else
                {
                    Index = null;
                }
            }
                // TODO: in some cases we can filter by length?  eg
                // regexp foo*bar must be at least length 6 bytes
                public IntersectEnum(BlockTreeTermsReader.FieldReader outerInstance, CompiledAutomaton compiled, BytesRef startTerm)
                {
                    this.OuterInstance = outerInstance;
                    // if (DEBUG) {
                    //   System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + brToString(compiled.commonSuffixRef));
                    // }
                    runAutomaton = compiled.RunAutomaton;
                    CompiledAutomaton = compiled;
                    @in = (IndexInput)[email protected]();
                    Stack = new Frame[5];
                    for (int idx = 0; idx < Stack.Length; idx++)
                    {
                        Stack[idx] = new Frame(this, idx);
                    }
                    for (int arcIdx = 0; arcIdx < Arcs.Length; arcIdx++)
                    {
                        Arcs[arcIdx] = new FST<BytesRef>.Arc<BytesRef>();
                    }

                    if (outerInstance.Index == null)
                    {
                        FstReader = null;
                    }
                    else
                    {
                        FstReader = outerInstance.Index.BytesReader;
                    }

                    // TODO: if the automaton is "smallish" we really
                    // should use the terms index to seek at least to
                    // the initial term and likely to subsequent terms
                    // (or, maybe just fallback to ATE for such cases).
                    // Else the seek cost of loading the frames will be
                    // too costly.

                    FST<BytesRef>.Arc<BytesRef> arc = outerInstance.Index.GetFirstArc(Arcs[0]);
                    // Empty string prefix must have an output in the index!
                    Debug.Assert(arc.Final);

                    // Special pushFrame since it's the first one:
                    Frame f = Stack[0];
                    f.Fp = f.FpOrig = outerInstance.RootBlockFP;
                    f.Prefix = 0;
                    f.State = runAutomaton.InitialState;
                    f.Arc = arc;
                    f.OutputPrefix = arc.Output;
                    f.Load(outerInstance.RootCode);

                    // for assert:
                    Debug.Assert(SetSavedStartTerm(startTerm));

                    CurrentFrame = f;
                    if (startTerm != null)
                    {
                        SeekToStartTerm(startTerm);
                    }
                }