public DirectIntersectTermsEnum(DirectPostingsFormat.DirectField outerInstance, CompiledAutomaton compiled, BytesRef startTerm) { this.outerInstance = outerInstance; runAutomaton = compiled.RunAutomaton; compiledAutomaton = compiled; termOrd = -1; states = new State[1]; states[0] = new State(this); states[0].changeOrd = outerInstance.terms.Length; states[0].state = runAutomaton.InitialState; states[0].transitions = compiledAutomaton.SortedTransitions[states[0].state]; states[0].transitionUpto = -1; states[0].transitionMax = -1; //System.out.println("IE.init startTerm=" + startTerm); if (startTerm != null) { int skipUpto = 0; if (startTerm.Length == 0) { if (outerInstance.terms.Length > 0 && outerInstance.termOffsets[1] == 0) { termOrd = 0; } } else { termOrd++; for (int i = 0; i < startTerm.Length; i++) { int label = startTerm.Bytes[startTerm.Offset + i] & 0xFF; while (label > states[i].transitionMax) { states[i].transitionUpto++; Debug.Assert(states[i].transitionUpto < states[i].transitions.Length); states[i].transitionMin = states[i].transitions[states[i].transitionUpto].Min; states[i].transitionMax = states[i].transitions[states[i].transitionUpto].Max; Debug.Assert(states[i].transitionMin >= 0); Debug.Assert(states[i].transitionMin <= 255); Debug.Assert(states[i].transitionMax >= 0); Debug.Assert(states[i].transitionMax <= 255); } // Skip forwards until we find a term matching // the label at this position: while (termOrd < outerInstance.terms.Length) { int skipOffset = outerInstance.skipOffsets[termOrd]; int numSkips = outerInstance.skipOffsets[termOrd + 1] - skipOffset; int termOffset_i = outerInstance.termOffsets[termOrd]; int termLength = outerInstance.termOffsets[1 + termOrd] - termOffset_i; // if (DEBUG) { // System.out.println(" check termOrd=" + termOrd + " term=" + new BytesRef(termBytes, termOffset, termLength).utf8ToString() + " skips=" + Arrays.toString(skips) + " i=" + i); // } if (termOrd == states[stateUpto].changeOrd) { // if (DEBUG) { // System.out.println(" end push return"); // } stateUpto--; termOrd--; return; } if (termLength == i) { termOrd++; skipUpto = 0; // if (DEBUG) { // System.out.println(" term too short; next term"); // } } else if (label < (outerInstance.termBytes[termOffset_i + i] & 0xFF)) { termOrd--; // if (DEBUG) { // System.out.println(" no match; already beyond; return termOrd=" + termOrd); // } stateUpto -= skipUpto; Debug.Assert(stateUpto >= 0); return; } else if (label == (outerInstance.termBytes[termOffset_i + i] & 0xFF)) { // if (DEBUG) { // System.out.println(" label[" + i + "] matches"); // } if (skipUpto < numSkips) { Grow(); int nextState = runAutomaton.Step(states[stateUpto].state, label); // Automaton is required to accept startTerm: Debug.Assert(nextState != -1); stateUpto++; states[stateUpto].changeOrd = outerInstance.skips[skipOffset + skipUpto++]; states[stateUpto].state = nextState; states[stateUpto].transitions = compiledAutomaton.SortedTransitions[nextState]; states[stateUpto].transitionUpto = -1; states[stateUpto].transitionMax = -1; //System.out.println(" push " + states[stateUpto].transitions.length + " trans"); // if (DEBUG) { // System.out.println(" push skip; changeOrd=" + states[stateUpto].changeOrd); // } // Match next label at this same term: goto nextLabelContinue; } else { // if (DEBUG) { // System.out.println(" linear scan"); // } // Index exhausted: just scan now (the // number of scans required will be less // than the minSkipCount): int startTermOrd = termOrd; while (termOrd < outerInstance.terms.Length && outerInstance.Compare(termOrd, startTerm) <= 0) { Debug.Assert(termOrd == startTermOrd || outerInstance.skipOffsets[termOrd] == outerInstance.skipOffsets[termOrd + 1]); termOrd++; } Debug.Assert(termOrd - startTermOrd < outerInstance.minSkipCount); termOrd--; stateUpto -= skipUpto; // if (DEBUG) { // System.out.println(" end termOrd=" + termOrd); // } return; } } else { if (skipUpto < numSkips) { termOrd = outerInstance.skips[skipOffset + skipUpto]; // if (DEBUG) { // System.out.println(" no match; skip to termOrd=" + termOrd); // } } else { // if (DEBUG) { // System.out.println(" no match; next term"); // } termOrd++; } skipUpto = 0; } } // startTerm is >= last term so enum will not // return any terms: termOrd--; // if (DEBUG) { // System.out.println(" beyond end; no terms will match"); // } return; nextLabelContinue: ; } nextLabelBreak: ; } int termOffset = outerInstance.termOffsets[termOrd]; int termLen = outerInstance.termOffsets[1 + termOrd] - termOffset; if (termOrd >= 0 && !startTerm.Equals(new BytesRef(outerInstance.termBytes, termOffset, termLen))) { stateUpto -= skipUpto; termOrd--; } // if (DEBUG) { // System.out.println(" loop end; return termOrd=" + termOrd + " stateUpto=" + stateUpto); // } } }
// TODO: in some cases we can filter by length? eg // regexp foo*bar must be at least length 6 bytes public IntersectEnum(BlockTreeTermsReader.FieldReader outerInstance, CompiledAutomaton compiled, BytesRef startTerm) { this.OuterInstance = outerInstance; // if (DEBUG) { // System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + brToString(compiled.commonSuffixRef)); // } runAutomaton = compiled.RunAutomaton; CompiledAutomaton = compiled; @in = (IndexInput)[email protected](); Stack = new Frame[5]; for (int idx = 0; idx < Stack.Length; idx++) { Stack[idx] = new Frame(this, idx); } for (int arcIdx = 0; arcIdx < Arcs.Length; arcIdx++) { Arcs[arcIdx] = new FST<BytesRef>.Arc<BytesRef>(); } if (outerInstance.Index == null) { FstReader = null; } else { FstReader = outerInstance.Index.BytesReader; } // TODO: if the automaton is "smallish" we really // should use the terms index to seek at least to // the initial term and likely to subsequent terms // (or, maybe just fallback to ATE for such cases). // Else the seek cost of loading the frames will be // too costly. FST<BytesRef>.Arc<BytesRef> arc = outerInstance.Index.GetFirstArc(Arcs[0]); // Empty string prefix must have an output in the index! Debug.Assert(arc.Final); // Special pushFrame since it's the first one: Frame f = Stack[0]; f.Fp = f.FpOrig = outerInstance.RootBlockFP; f.Prefix = 0; f.State = runAutomaton.InitialState; f.Arc = arc; f.OutputPrefix = arc.Output; f.Load(outerInstance.RootCode); // for assert: Debug.Assert(SetSavedStartTerm(startTerm)); CurrentFrame = f; if (startTerm != null) { SeekToStartTerm(startTerm); } }