public override bool SeekExact(BytesRef target) { if (OuterInstance.Index == null) { throw new InvalidOperationException("terms index was not loaded"); } if (Term_Renamed.Bytes.Length <= target.Length) { Term_Renamed.Bytes = ArrayUtil.Grow(Term_Renamed.Bytes, 1 + target.Length); } Debug.Assert(ClearEOF()); FST<BytesRef>.Arc<BytesRef> arc; int targetUpto; BytesRef output; TargetBeforeCurrentLength = CurrentFrame.Ord; if (CurrentFrame != StaticFrame) { // We are already seek'd; find the common // prefix of new seek term vs current term and // re-use the corresponding seek state. For // example, if app first seeks to foobar, then // seeks to foobaz, we can re-use the seek state // for the first 5 bytes. // if (DEBUG) { // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix); // } arc = Arcs[0]; Debug.Assert(arc.Final); output = arc.Output; targetUpto = 0; Frame lastFrame = Stack[0]; Debug.Assert(ValidIndexPrefix <= Term_Renamed.Length); int targetLimit = Math.Min(target.Length, ValidIndexPrefix); int cmp = 0; // TODO: reverse vLong byte order for better FST // prefix output sharing // First compare up to valid seek frames: while (targetUpto < targetLimit) { cmp = (Term_Renamed.Bytes[targetUpto] & 0xFF) - (target.Bytes[target.Offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output); // } if (cmp != 0) { break; } arc = Arcs[1 + targetUpto]; //if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) { //System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF)); //} Debug.Assert(arc.Label == (target.Bytes[target.Offset + targetUpto] & 0xFF), "arc.label=" + (char)arc.Label + " targetLabel=" + (char)(target.Bytes[target.Offset + targetUpto] & 0xFF)); if (arc.Output != OuterInstance.OuterInstance.NO_OUTPUT) { output = OuterInstance.OuterInstance.FstOutputs.Add(output, arc.Output); } if (arc.Final) { lastFrame = Stack[1 + lastFrame.Ord]; } targetUpto++; } if (cmp == 0) { int targetUptoMid = targetUpto; // Second compare the rest of the term, but // don't save arc/output/frame; we only do this // to find out if the target term is before, // equal or after the current term int targetLimit2 = Math.Min(target.Length, Term_Renamed.Length); while (targetUpto < targetLimit2) { cmp = (Term_Renamed.Bytes[targetUpto] & 0xFF) - (target.Bytes[target.Offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"); // } if (cmp != 0) { break; } targetUpto++; } if (cmp == 0) { cmp = Term_Renamed.Length - target.Length; } targetUpto = targetUptoMid; } if (cmp < 0) { // Common case: target term is after current // term, ie, app is seeking multiple terms // in sorted order // if (DEBUG) { // System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord); // } CurrentFrame = lastFrame; } else if (cmp > 0) { // Uncommon case: target term // is before current term; this means we can // keep the currentFrame but we must rewind it // (so we scan from the start) TargetBeforeCurrentLength = 0; // if (DEBUG) { // System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord); // } CurrentFrame = lastFrame; CurrentFrame.Rewind(); } else { // Target is exactly the same as current term Debug.Assert(Term_Renamed.Length == target.Length); if (TermExists) { // if (DEBUG) { // System.out.println(" target is same as current; return true"); // } return true; } else { // if (DEBUG) { // System.out.println(" target is same as current but term doesn't exist"); // } } //validIndexPrefix = currentFrame.depth; //term.length = target.length; //return termExists; } } else { TargetBeforeCurrentLength = -1; arc = OuterInstance.Index.GetFirstArc(Arcs[0]); // Empty string prefix must have an output (block) in the index! Debug.Assert(arc.Final); Debug.Assert(arc.Output != null); // if (DEBUG) { // System.out.println(" no seek state; push root frame"); // } output = arc.Output; CurrentFrame = StaticFrame; //term.length = 0; targetUpto = 0; CurrentFrame = PushFrame(arc, OuterInstance.OuterInstance.FstOutputs.Add(output, arc.NextFinalOutput), 0); } // if (DEBUG) { // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength); // } while (targetUpto < target.Length) { int targetLabel = target.Bytes[target.Offset + targetUpto] & 0xFF; FST<BytesRef>.Arc<BytesRef> nextArc = OuterInstance.Index.FindTargetArc(targetLabel, arc, GetArc(1 + targetUpto), FstReader); if (nextArc == null) { // Index is exhausted // if (DEBUG) { // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel)); // } ValidIndexPrefix = CurrentFrame.Prefix; //validIndexPrefix = targetUpto; CurrentFrame.ScanToFloorFrame(target); if (!CurrentFrame.HasTerms) { TermExists = false; Term_Renamed.Bytes[targetUpto] = (byte)targetLabel; Term_Renamed.Length = 1 + targetUpto; // if (DEBUG) { // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // } return false; } CurrentFrame.LoadBlock(); SeekStatus result = CurrentFrame.ScanToTerm(target, true); if (result == SeekStatus.FOUND) { // if (DEBUG) { // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term); // } return true; } else { // if (DEBUG) { // System.out.println(" got " + result + "; return NOT_FOUND term=" + brToString(term)); // } return false; } } else { // Follow this arc arc = nextArc; Term_Renamed.Bytes[targetUpto] = (byte)targetLabel; // Aggregate output as we go: Debug.Assert(arc.Output != null); if (arc.Output != OuterInstance.OuterInstance.NO_OUTPUT) { output = OuterInstance.OuterInstance.FstOutputs.Add(output, arc.Output); } // if (DEBUG) { // System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput); // } targetUpto++; if (arc.Final) { //if (DEBUG) System.out.println(" arc is final!"); CurrentFrame = PushFrame(arc, OuterInstance.OuterInstance.FstOutputs.Add(output, arc.NextFinalOutput), targetUpto); //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms); } } } //validIndexPrefix = targetUpto; ValidIndexPrefix = CurrentFrame.Prefix; CurrentFrame.ScanToFloorFrame(target); // Target term is entirely contained in the index: if (!CurrentFrame.HasTerms) { TermExists = false; Term_Renamed.Length = targetUpto; // if (DEBUG) { // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); // } return false; } CurrentFrame.LoadBlock(); SeekStatus result_ = CurrentFrame.ScanToTerm(target, true); if (result_ == SeekStatus.FOUND) { // if (DEBUG) { // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term); // } return true; } else { // if (DEBUG) { // System.out.println(" got result " + result + "; return NOT_FOUND term=" + term.utf8ToString()); // } return false; } }
public override SeekStatus SeekCeil(BytesRef target) { if (OuterInstance.Index == null) { throw new InvalidOperationException("terms index was not loaded"); } if (Term_Renamed.Bytes.Length <= target.Length) { Term_Renamed.Bytes = ArrayUtil.Grow(Term_Renamed.Bytes, 1 + target.Length); } Debug.Assert(ClearEOF()); //if (DEBUG) { //System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix); //printSeekState(); //} FST<BytesRef>.Arc<BytesRef> arc; int targetUpto; BytesRef output; TargetBeforeCurrentLength = CurrentFrame.Ord; if (CurrentFrame != StaticFrame) { // We are already seek'd; find the common // prefix of new seek term vs current term and // re-use the corresponding seek state. For // example, if app first seeks to foobar, then // seeks to foobaz, we can re-use the seek state // for the first 5 bytes. //if (DEBUG) { //System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix); //} arc = Arcs[0]; Debug.Assert(arc.Final); output = arc.Output; targetUpto = 0; Frame lastFrame = Stack[0]; Debug.Assert(ValidIndexPrefix <= Term_Renamed.Length); int targetLimit = Math.Min(target.Length, ValidIndexPrefix); int cmp = 0; // TOOD: we should write our vLong backwards (MSB // first) to get better sharing from the FST // First compare up to valid seek frames: while (targetUpto < targetLimit) { cmp = (Term_Renamed.Bytes[targetUpto] & 0xFF) - (target.Bytes[target.Offset + targetUpto] & 0xFF); //if (DEBUG) { //System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output); //} if (cmp != 0) { break; } arc = Arcs[1 + targetUpto]; Debug.Assert(arc.Label == (target.Bytes[target.Offset + targetUpto] & 0xFF), "arc.label=" + (char)arc.Label + " targetLabel=" + (char)(target.Bytes[target.Offset + targetUpto] & 0xFF)); // TOOD: we could save the outputs in local // byte[][] instead of making new objs ever // seek; but, often the FST doesn't have any // shared bytes (but this could change if we // reverse vLong byte order) if (arc.Output != OuterInstance.OuterInstance.NO_OUTPUT) { output = OuterInstance.OuterInstance.FstOutputs.Add(output, arc.Output); } if (arc.Final) { lastFrame = Stack[1 + lastFrame.Ord]; } targetUpto++; } if (cmp == 0) { int targetUptoMid = targetUpto; // Second compare the rest of the term, but // don't save arc/output/frame: int targetLimit2 = Math.Min(target.Length, Term_Renamed.Length); while (targetUpto < targetLimit2) { cmp = (Term_Renamed.Bytes[targetUpto] & 0xFF) - (target.Bytes[target.Offset + targetUpto] & 0xFF); //if (DEBUG) { //System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"); //} if (cmp != 0) { break; } targetUpto++; } if (cmp == 0) { cmp = Term_Renamed.Length - target.Length; } targetUpto = targetUptoMid; } if (cmp < 0) { // Common case: target term is after current // term, ie, app is seeking multiple terms // in sorted order //if (DEBUG) { //System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); clear frame.scanned ord=" + lastFrame.ord); //} CurrentFrame = lastFrame; } else if (cmp > 0) { // Uncommon case: target term // is before current term; this means we can // keep the currentFrame but we must rewind it // (so we scan from the start) TargetBeforeCurrentLength = 0; //if (DEBUG) { //System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord); //} CurrentFrame = lastFrame; CurrentFrame.Rewind(); } else { // Target is exactly the same as current term Debug.Assert(Term_Renamed.Length == target.Length); if (TermExists) { //if (DEBUG) { //System.out.println(" target is same as current; return FOUND"); //} return SeekStatus.FOUND; } else { //if (DEBUG) { //System.out.println(" target is same as current but term doesn't exist"); //} } } } else { TargetBeforeCurrentLength = -1; arc = OuterInstance.Index.GetFirstArc(Arcs[0]); // Empty string prefix must have an output (block) in the index! Debug.Assert(arc.Final); Debug.Assert(arc.Output != null); //if (DEBUG) { //System.out.println(" no seek state; push root frame"); //} output = arc.Output; CurrentFrame = StaticFrame; //term.length = 0; targetUpto = 0; CurrentFrame = PushFrame(arc, OuterInstance.OuterInstance.FstOutputs.Add(output, arc.NextFinalOutput), 0); } //if (DEBUG) { //System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength); //} while (targetUpto < target.Length) { int targetLabel = target.Bytes[target.Offset + targetUpto] & 0xFF; FST<BytesRef>.Arc<BytesRef> nextArc = OuterInstance.Index.FindTargetArc(targetLabel, arc, GetArc(1 + targetUpto), FstReader); if (nextArc == null) { // Index is exhausted // if (DEBUG) { // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel)); // } ValidIndexPrefix = CurrentFrame.Prefix; //validIndexPrefix = targetUpto; CurrentFrame.ScanToFloorFrame(target); CurrentFrame.LoadBlock(); SeekStatus result = CurrentFrame.ScanToTerm(target, false); if (result == SeekStatus.END) { Term_Renamed.CopyBytes(target); TermExists = false; if (Next() != null) { //if (DEBUG) { //System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term); //} return SeekStatus.NOT_FOUND; } else { //if (DEBUG) { //System.out.println(" return END"); //} return SeekStatus.END; } } else { //if (DEBUG) { //System.out.println(" return " + result + " term=" + brToString(term) + " " + term); //} return result; } } else { // Follow this arc Term_Renamed.Bytes[targetUpto] = (byte)targetLabel; arc = nextArc; // Aggregate output as we go: Debug.Assert(arc.Output != null); if (arc.Output != OuterInstance.OuterInstance.NO_OUTPUT) { output = OuterInstance.OuterInstance.FstOutputs.Add(output, arc.Output); } //if (DEBUG) { //System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput); //} targetUpto++; if (arc.Final) { //if (DEBUG) System.out.println(" arc is final!"); CurrentFrame = PushFrame(arc, OuterInstance.OuterInstance.FstOutputs.Add(output, arc.NextFinalOutput), targetUpto); //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms); } } } //validIndexPrefix = targetUpto; ValidIndexPrefix = CurrentFrame.Prefix; CurrentFrame.ScanToFloorFrame(target); CurrentFrame.LoadBlock(); SeekStatus result_ = CurrentFrame.ScanToTerm(target, false); if (result_ == SeekStatus.END) { Term_Renamed.CopyBytes(target); TermExists = false; if (Next() != null) { //if (DEBUG) { //System.out.println(" return NOT_FOUND term=" + term.utf8ToString() + " " + term); //} return SeekStatus.NOT_FOUND; } else { //if (DEBUG) { //System.out.println(" return END"); //} return SeekStatus.END; } } else { return result_; } }
/// <summary> /// Runs next() through the entire terms dict, /// computing aggregate statistics. /// </summary> public Stats ComputeBlockStats() { Stats stats = new Stats(OuterInstance.OuterInstance.Segment, OuterInstance.fieldInfo.Name); if (OuterInstance.Index != null) { stats.IndexNodeCount = OuterInstance.Index.NodeCount; stats.IndexArcCount = OuterInstance.Index.ArcCount; stats.IndexNumBytes = OuterInstance.Index.SizeInBytes(); } CurrentFrame = StaticFrame; FST<BytesRef>.Arc<BytesRef> arc; if (OuterInstance.Index != null) { arc = OuterInstance.Index.GetFirstArc(Arcs[0]); // Empty string prefix must have an output in the index! Debug.Assert(arc.Final); } else { arc = null; } // Empty string prefix must have an output in the // index! CurrentFrame = PushFrame(arc, OuterInstance.RootCode, 0); CurrentFrame.FpOrig = CurrentFrame.Fp; CurrentFrame.LoadBlock(); ValidIndexPrefix = 0; stats.StartBlock(CurrentFrame, !(CurrentFrame.IsLastInFloor)); while (true) { // Pop finished blocks while (CurrentFrame.NextEnt == CurrentFrame.EntCount) { stats.EndBlock(CurrentFrame); if (!CurrentFrame.IsLastInFloor) { CurrentFrame.LoadNextFloorBlock(); stats.StartBlock(CurrentFrame, true); } else { if (CurrentFrame.Ord == 0) { goto allTermsBreak; } long lastFP = CurrentFrame.FpOrig; CurrentFrame = Stack[CurrentFrame.Ord - 1]; Debug.Assert(lastFP == CurrentFrame.LastSubFP); // if (DEBUG) { // System.out.println(" reset validIndexPrefix=" + validIndexPrefix); // } } } while (true) { if (CurrentFrame.Next()) { // Push to new block: CurrentFrame = PushFrame(null, CurrentFrame.LastSubFP, Term_Renamed.Length); CurrentFrame.FpOrig = CurrentFrame.Fp; // this is a "next" frame -- even if it's // floor'd we must pretend it isn't so we don't // try to scan to the right floor frame: CurrentFrame.IsFloor = false; //currentFrame.hasTerms = true; CurrentFrame.LoadBlock(); stats.StartBlock(CurrentFrame, !CurrentFrame.IsLastInFloor); } else { stats.Term(Term_Renamed); break; } } //allTermsContinue:; } allTermsBreak: stats.Finish(); // Put root frame back: CurrentFrame = StaticFrame; if (OuterInstance.Index != null) { arc = OuterInstance.Index.GetFirstArc(Arcs[0]); // Empty string prefix must have an output in the index! Debug.Assert(arc.Final); } else { arc = null; } CurrentFrame = PushFrame(arc, OuterInstance.RootCode, 0); CurrentFrame.Rewind(); CurrentFrame.LoadBlock(); ValidIndexPrefix = 0; Term_Renamed.Length = 0; return stats; }