public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) { this.FreqStream = (IndexInput)freqStream.Clone(); this.Tis = tis; this.FieldInfos = fieldInfos; SkipInterval = tis.SkipInterval; MaxSkipLevels = tis.MaxSkipLevels; }
public Lucene3xFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, IOContext context, int indexDivisor) { Si = info; // NOTE: we must always load terms index, even for // "sequential" scan during merging, because what is // sequential to merger may not be to TermInfosReader // since we do the surrogates dance: if (indexDivisor < 0) { indexDivisor = -indexDivisor; } bool success = false; try { TermInfosReader r = new TermInfosReader(dir, info.Name, fieldInfos, context, indexDivisor); if (indexDivisor == -1) { TisNoIndex = r; } else { TisNoIndex = null; Tis = r; } this.Context = context; this.FieldInfos = fieldInfos; // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them FreqStream = dir.OpenInput(IndexFileNames.SegmentFileName(info.Name, "", Lucene3xPostingsFormat.FREQ_EXTENSION), context); bool anyProx = false; foreach (FieldInfo fi in fieldInfos) { if (fi.Indexed) { Fields[fi.Name] = fi; PreTerms_[fi.Name] = new PreTerms(this, fi); if (fi.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { anyProx = true; } } } if (anyProx) { ProxStream = dir.OpenInput(IndexFileNames.SegmentFileName(info.Name, "", Lucene3xPostingsFormat.PROX_EXTENSION), context); } else { ProxStream = null; } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } this.Dir = dir; }
//private Directory cfsReader; // LUCENENET NOTE: cfsReader not used public Lucene3xFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, IOContext context, int indexDivisor) { si = info; // NOTE: we must always load terms index, even for // "sequential" scan during merging, because what is // sequential to merger may not be to TermInfosReader // since we do the surrogates dance: if (indexDivisor < 0) { indexDivisor = -indexDivisor; } bool success = false; try { var r = new TermInfosReader(dir, info.Name, fieldInfos, context, indexDivisor); if (indexDivisor == -1) { TisNoIndex = r; } else { TisNoIndex = null; Tis = r; } this.context = context; this.fieldInfos = fieldInfos; // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them FreqStream = dir.OpenInput(IndexFileNames.SegmentFileName(info.Name, "", Lucene3xPostingsFormat.FREQ_EXTENSION), context); bool anyProx = false; foreach (FieldInfo fi in fieldInfos) { if (fi.IsIndexed) { fields[fi.Name] = fi; preTerms[fi.Name] = new PreTerms(this, fi); if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { anyProx = true; } } } if (anyProx) { ProxStream = dir.OpenInput(IndexFileNames.SegmentFileName(info.Name, "", Lucene3xPostingsFormat.PROX_EXTENSION), context); } else { ProxStream = null; } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } this.dir = dir; }
public override SeekStatus SeekCeil(BytesRef term) { if (DEBUG_SURROGATES) { Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } skipNext = false; TermInfosReader tis = outerInstance.TermsDict; Term t0 = new Term(fieldInfo.Name, term); Debug.Assert(termEnum != null); tis.SeekEnum(termEnum, t0, false); Term t = termEnum.Term(); if (t != null && t.Field == internedFieldName && term.BytesEquals(t.Bytes)) { // If we found an exact match, no need to do the // surrogate dance if (DEBUG_SURROGATES) { Console.WriteLine(" seek exact match"); } current = t.Bytes; return(SeekStatus.FOUND); } else if (t == null || t.Field != internedFieldName) { // TODO: maybe we can handle this like the next() // into null? set term as prevTerm then dance? if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit EOF"); } // We hit EOF; try end-case surrogate dance: if we // find an E, try swapping in S, backwards: scratchTerm.CopyBytes(term); Debug.Assert(scratchTerm.Offset == 0); for (int i = scratchTerm.Length - 1; i >= 0; i--) { if (IsHighBMPChar(scratchTerm.Bytes, i)) { if (DEBUG_SURROGATES) { Console.WriteLine(" found E pos=" + i + "; try seek"); } if (SeekToNonBMP(seekTermEnum, scratchTerm, i)) { scratchTerm.CopyBytes(seekTermEnum.Term().Bytes); outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), false); newSuffixStart = 1 + i; DoPushes(); // Found a match // TODO: faster seek? current = termEnum.Term().Bytes; return(SeekStatus.NOT_FOUND); } } } if (DEBUG_SURROGATES) { Console.WriteLine(" seek END"); } current = null; return(SeekStatus.END); } else { // We found a non-exact but non-null term; this one // is fun -- just treat it like next, by pretending // requested term was prev: prevTerm.CopyBytes(term); if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text())); } BytesRef br = t.Bytes; Debug.Assert(br.Offset == 0); SetNewSuffixStart(term, br); SurrogateDance(); Term t2 = termEnum.Term(); if (t2 == null || t2.Field != internedFieldName) { // PreFlex codec interns field names; verify: Debug.Assert(t2 == null || !t2.Field.Equals(internedFieldName, StringComparison.Ordinal)); current = null; return(SeekStatus.END); } else { current = t2.Bytes; Debug.Assert(!unicodeSortOrder || term.CompareTo(current) < 0, "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(current.Utf8ToString())); return(SeekStatus.NOT_FOUND); } } }
/* * SegmentTermPositions(SegmentReader p) { * super(p); * this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time * } */ public SegmentTermPositions(IndexInput freqStream, IndexInput proxStream, TermInfosReader tis, FieldInfos fieldInfos) : base(freqStream, tis, fieldInfos) { this.proxStreamOrig = proxStream; // the proxStream will be cloned lazily when nextPosition() is called for the first time }