public override void SeekExact(BytesRef target, TermState otherState) { if (!target.Equals(term_Renamed)) { state.CopyFrom(otherState); term_Renamed = BytesRef.DeepCopyOf(target); seekPending = true; } }
public override Bits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) { Debug.Assert(info.HasDeletions()); var scratch = new BytesRef(); var scratchUtf16 = new CharsRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen); ChecksumIndexInput input = null; var success = false; try { input = dir.OpenChecksumInput(fileName, context); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SIZE)); var size = ParseIntAt(scratch, SIZE.Length, scratchUtf16); var bits = new BitArray(size); SimpleTextUtil.ReadLine(input, scratch); while (!scratch.Equals(END)) { Debug.Assert(StringHelper.StartsWith(scratch, DOC)); var docid = ParseIntAt(scratch, DOC.Length, scratchUtf16); bits.Set(docid, true); SimpleTextUtil.ReadLine(input, scratch); } SimpleTextUtil.CheckFooter(input); success = true; return(new SimpleTextBits(bits, size)); } finally { if (success) { IOUtils.Close(input); } else { IOUtils.CloseWhileHandlingException(input); } } }
/// <remarks> /// we don't actually write a .fdx-like index, instead we read the /// stored fields file in entirety up-front and save the offsets /// so we can seek to the documents later. /// </remarks> private void ReadIndex(int size) { ChecksumIndexInput input = new BufferedChecksumIndexInput(_input); _offsets = new long[size]; var upto = 0; while (!_scratch.Equals(SimpleTextStoredFieldsWriter.END)) { SimpleTextUtil.ReadLine(input, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.DOC)) { _offsets[upto] = input.FilePointer; upto++; } } SimpleTextUtil.CheckFooter(input); Debug.Assert(upto == _offsets.Length); }
public override void CheckIntegrity() { var iScratch = new BytesRef(); var clone = (IndexInput)DATA.Clone(); clone.Seek(0); ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); while (true) { SimpleTextUtil.ReadLine(input, iScratch); if (!iScratch.Equals(SimpleTextDocValuesWriter.END)) { continue; } SimpleTextUtil.CheckFooter(input); break; } }
private SortedDictionary <string, long?> ReadFields(IndexInput @in) { ChecksumIndexInput input = new BufferedChecksumIndexInput(@in); var scratch = new BytesRef(10); var fields = new SortedDictionary <string, long?>(); while (true) { SimpleTextUtil.ReadLine(input, scratch); if (scratch.Equals(SimpleTextFieldsWriter.END)) { SimpleTextUtil.CheckFooter(input); return(fields); } if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD)) { var fieldName = Encoding.UTF8.GetString(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FIELD.Length, scratch.Length - SimpleTextFieldsWriter.FIELD.Length); fields[fieldName] = input.FilePointer; } } }
internal IntersectTermsEnum(FSTTermsReader.TermsReader outerInstance, CompiledAutomaton compiled, BytesRef startTerm) : base(outerInstance) { this.outerInstance = outerInstance; //if (TEST) System.out.println("Enum init, startTerm=" + startTerm); this.fst = outerInstance.dict; this.fstReader = fst.BytesReader; this.fstOutputs = outerInstance.dict.Outputs; this.fsa = compiled.RunAutomaton; this.level = -1; this.stack = new Frame[16]; for (int i = 0; i < stack.Length; i++) { this.stack[i] = new Frame(this); } Frame frame; frame = LoadVirtualFrame(NewFrame()); this.level++; frame = LoadFirstFrame(NewFrame()); PushFrame(frame); this.meta = null; this.metaUpto = 1; this.decoded = false; this.pending = false; if (startTerm == null) { pending = IsAccept(TopFrame()); } else { DoSeekCeil(startTerm); pending = !startTerm.Equals(term_Renamed) && IsValid(TopFrame()) && IsAccept(TopFrame()); } }
private SortedDictionary<string, long?> ReadFields(IndexInput @in) { ChecksumIndexInput input = new BufferedChecksumIndexInput(@in); var scratch = new BytesRef(10); var fields = new SortedDictionary<string, long?>(); while (true) { SimpleTextUtil.ReadLine(input, scratch); if (scratch.Equals(SimpleTextFieldsWriter.END)) { SimpleTextUtil.CheckFooter(input); return fields; } if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD)) { var fieldName = scratch.Bytes.SubList(scratch.Offset + SimpleTextFieldsWriter.FIELD.Length, scratch.Length - SimpleTextFieldsWriter.FIELD.Length).ToString(); fields[fieldName] = input.FilePointer; } } }
public override Bits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) { Debug.Assert(info.HasDeletions()); var scratch = new BytesRef(); var scratchUtf16 = new CharsRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen); ChecksumIndexInput input = null; var success = false; try { input = dir.OpenChecksumInput(fileName, context); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SIZE)); var size = ParseIntAt(scratch, SIZE.Length, scratchUtf16); var bits = new BitArray(size); SimpleTextUtil.ReadLine(input, scratch); while (!scratch.Equals(END)) { Debug.Assert(StringHelper.StartsWith(scratch, DOC)); var docid = ParseIntAt(scratch, DOC.Length, scratchUtf16); bits.SafeSet(docid, true); SimpleTextUtil.ReadLine(input, scratch); } SimpleTextUtil.CheckFooter(input); success = true; return new SimpleTextBits(bits, size); } finally { if (success) { IOUtils.Close(input); } else { IOUtils.CloseWhileHandlingException(input); } } }
public override void CheckIntegrity() { var iScratch = new BytesRef(); var clone = (IndexInput) DATA.Clone(); clone.Seek(0); ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); while (true) { SimpleTextUtil.ReadLine(input, iScratch); if (!iScratch.Equals(SimpleTextDocValuesWriter.END)) continue; SimpleTextUtil.CheckFooter(input); break; } }
private void LoadTerms() { var posIntOutputs = PositiveIntOutputs.Singleton; var outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs); var outputs = new PairOutputs <long?, PairOutputs <long?, long?> .Pair>(posIntOutputs, outputsInner); // honestly, wtf kind of generic mess is this. var b = new Builder <PairOutputs <long?, PairOutputs <long?, long?> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs); var input = (IndexInput)_outerInstance._input.Clone(); input.Seek(_termsStart); var lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; var visitedDocs = new FixedBitSet(_maxDoc); var scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.ReadLine(input, _scratch); if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); _sumTotalTermFreq += totalTermFreq; } break; } if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { docFreq++; _sumDocFreq++; UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); int docId = ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); visitedDocs.Set(docId); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); totalTermFreq += ArrayUtil.ParseInt(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq))); } lastDocsStart = input.FilePointer; int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length; if (len > lastTerm.Length) { lastTerm.Grow(len); } Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; _sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; _termCount++; } } _docCount = visitedDocs.Cardinality(); _fst = b.Finish(); }
public DirectIntersectTermsEnum(DirectPostingsFormat.DirectField outerInstance, CompiledAutomaton compiled, BytesRef startTerm) { this.outerInstance = outerInstance; runAutomaton = compiled.runAutomaton; compiledAutomaton = compiled; termOrd = -1; states = new State[1]; states[0] = new State(this); states[0].changeOrd = outerInstance.terms.Length; states[0].state = runAutomaton.InitialState; states[0].transitions = compiledAutomaton.sortedTransitions[states[0].state]; states[0].transitionUpto = -1; states[0].transitionMax = -1; //System.out.println("IE.init startTerm=" + startTerm); if (startTerm != null) { int skipUpto = 0; if (startTerm.length == 0) { if (outerInstance.terms.Length > 0 && outerInstance.termOffsets[1] == 0) { termOrd = 0; } } else { termOrd++; for (int i = 0; i < startTerm.length; i++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int label = startTerm.bytes[startTerm.offset+i] & 0xFF; int label = startTerm.bytes[startTerm.offset + i] & 0xFF; while (label > states[i].transitionMax) { states[i].transitionUpto++; Debug.Assert(states[i].transitionUpto < states[i].transitions.Length); states[i].transitionMin = states[i].transitions[states[i].transitionUpto].Min; states[i].transitionMax = states[i].transitions[states[i].transitionUpto].Max; Debug.Assert(states[i].transitionMin >= 0); Debug.Assert(states[i].transitionMin <= 255); Debug.Assert(states[i].transitionMax >= 0); Debug.Assert(states[i].transitionMax <= 255); } // Skip forwards until we find a term matching // the label at this position: while (termOrd < outerInstance.terms.Length) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int skipOffset = skipOffsets[termOrd]; int skipOffset = outerInstance.skipOffsets[termOrd]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numSkips = skipOffsets[termOrd+1] - skipOffset; int numSkips = outerInstance.skipOffsets[termOrd + 1] - skipOffset; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int termOffset = termOffsets[termOrd]; int termOffset = outerInstance.termOffsets[termOrd]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int termLength = termOffsets[1+termOrd] - termOffset; int termLength = outerInstance.termOffsets[1 + termOrd] - termOffset; // if (DEBUG) { // System.out.println(" check termOrd=" + termOrd + " term=" + new BytesRef(termBytes, termOffset, termLength).utf8ToString() + " skips=" + Arrays.toString(skips) + " i=" + i); // } if (termOrd == states[stateUpto].changeOrd) { // if (DEBUG) { // System.out.println(" end push return"); // } stateUpto--; termOrd--; return; } if (termLength == i) { termOrd++; skipUpto = 0; // if (DEBUG) { // System.out.println(" term too short; next term"); // } } else if (label < (outerInstance.termBytes[termOffset + i] & 0xFF)) { termOrd--; // if (DEBUG) { // System.out.println(" no match; already beyond; return termOrd=" + termOrd); // } stateUpto -= skipUpto; Debug.Assert(stateUpto >= 0); return; } else if (label == (outerInstance.termBytes[termOffset + i] & 0xFF)) { // if (DEBUG) { // System.out.println(" label[" + i + "] matches"); // } if (skipUpto < numSkips) { grow(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int nextState = runAutomaton.step(states[stateUpto].state, label); int nextState = runAutomaton.step(states[stateUpto].state, label); // Automaton is required to accept startTerm: Debug.Assert(nextState != -1); stateUpto++; states[stateUpto].changeOrd = outerInstance.skips[skipOffset + skipUpto++]; states[stateUpto].state = nextState; states[stateUpto].transitions = compiledAutomaton.sortedTransitions[nextState]; states[stateUpto].transitionUpto = -1; states[stateUpto].transitionMax = -1; //System.out.println(" push " + states[stateUpto].transitions.length + " trans"); // if (DEBUG) { // System.out.println(" push skip; changeOrd=" + states[stateUpto].changeOrd); // } // Match next label at this same term: goto nextLabelContinue; } else { // if (DEBUG) { // System.out.println(" linear scan"); // } // Index exhausted: just scan now (the // number of scans required will be less // than the minSkipCount): //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int startTermOrd = termOrd; int startTermOrd = termOrd; while (termOrd < outerInstance.terms.Length && outerInstance.compare(termOrd, startTerm) <= 0) { Debug.Assert(termOrd == startTermOrd || outerInstance.skipOffsets[termOrd] == outerInstance.skipOffsets[termOrd + 1]); termOrd++; } Debug.Assert(termOrd - startTermOrd < outerInstance.minSkipCount); termOrd--; stateUpto -= skipUpto; // if (DEBUG) { // System.out.println(" end termOrd=" + termOrd); // } return; } } else { if (skipUpto < numSkips) { termOrd = outerInstance.skips[skipOffset + skipUpto]; // if (DEBUG) { // System.out.println(" no match; skip to termOrd=" + termOrd); // } } else { // if (DEBUG) { // System.out.println(" no match; next term"); // } termOrd++; } skipUpto = 0; } } // startTerm is >= last term so enum will not // return any terms: termOrd--; // if (DEBUG) { // System.out.println(" beyond end; no terms will match"); // } return; nextLabelContinue: ; } nextLabelBreak: ; } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int termOffset = termOffsets[termOrd]; int termOffset = outerInstance.termOffsets[termOrd]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int termLen = termOffsets[1+termOrd] - termOffset; int termLen = outerInstance.termOffsets[1 + termOrd] - termOffset; if (termOrd >= 0 && !startTerm.Equals(new BytesRef(outerInstance.termBytes, termOffset, termLen))) { stateUpto -= skipUpto; termOrd--; } // if (DEBUG) { // System.out.println(" loop end; return termOrd=" + termOrd + " stateUpto=" + stateUpto); // } } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public void seekExact(util.BytesRef term, index.TermState state) throws java.io.IOException public override void seekExact(BytesRef term, TermState state) { termOrd = (int) ((OrdTermState) state).ord; setTerm(); Debug.Assert(term.Equals(scratch)); }