private void EnsureIndexIsRead() { lock (this) { if (indexTerms != null) { // index already read return; // do nothing } try { int indexSize = (int)indexEnum.size; // otherwise read index indexTerms = new Term[indexSize]; indexInfos = new TermInfo[indexSize]; indexPointers = new long[indexSize]; for (int i = 0; indexEnum.Next(); i++) { indexTerms[i] = indexEnum.Term(); indexInfos[i] = indexEnum.TermInfo(); indexPointers[i] = indexEnum.indexPointer; } } finally { indexEnum.Close(); indexEnum = null; } } }
/// <summary>Scans within block for matching term. </summary> private TermInfo ScanEnum(Term term) { SegmentTermEnum enumerator = GetEnum(); enumerator.ScanTo(term); if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) { return(enumerator.TermInfo()); } else { return(null); } }
public virtual void Seek(TermEnum termEnum) { TermInfo ti; Term term; // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs if (termEnum is SegmentTermEnum && ((SegmentTermEnum)termEnum).fieldInfos == parent.fieldInfos) { // optimized case SegmentTermEnum segmentTermEnum = ((SegmentTermEnum)termEnum); term = segmentTermEnum.Term(); ti = segmentTermEnum.TermInfo(); } else { // punt case term = termEnum.Term(); ti = parent.tis.Get(term); } Seek(ti, term); }
internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor) { bool success = false; if (indexDivisor < 1 && indexDivisor != -1) { throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor); } try { directory = dir; segment = seg; fieldInfos = fis; origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false); size = origEnum.size; if (indexDivisor != -1) { // Load terms index totalIndexInterval = origEnum.indexInterval * indexDivisor; SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true); try { int indexSize = 1 + ((int)indexEnum.size - 1) / indexDivisor; // otherwise read index indexTerms = new Term[indexSize]; indexInfos = new TermInfo[indexSize]; indexPointers = new long[indexSize]; for (int i = 0; indexEnum.Next(); i++) { indexTerms[i] = indexEnum.Term(); indexInfos[i] = indexEnum.TermInfo(); indexPointers[i] = indexEnum.indexPointer; for (int j = 1; j < indexDivisor; j++) { if (!indexEnum.Next()) { break; } } } } finally { indexEnum.Close(); } } else { // Do not load terms index: totalIndexInterval = -1; indexTerms = null; indexInfos = null; indexPointers = null; } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
/// <summary>Returns the TermInfo for a Term in the set, or null. </summary> private TermInfo Get(Term term, bool useCache) { if (size == 0) { return(null); } EnsureIndexIsRead(); TermInfo ti; ThreadResources resources = GetThreadResources(); Lucene.Net.Util.Cache.Cache cache = null; if (useCache) { cache = resources.termInfoCache; // check the cache first if the term was recently looked up ti = (TermInfo)cache.Get(term); if (ti != null) { return(ti); } } // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = resources.termEnum; if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0)) { int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1; if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0) { // no need to seek int numScans = enumerator.ScanTo(term); if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) { ti = enumerator.TermInfo(); if (cache != null && numScans > 1) { // we only want to put this TermInfo into the cache if // scanEnum skipped more than one dictionary entry. // This prevents RangeQueries or WildcardQueries to // wipe out the cache when they iterate over a large numbers // of terms in order cache.Put(term, ti); } } else { ti = null; } return(ti); } } // random-access: must seek SeekEnum(enumerator, GetIndexOffset(term)); enumerator.ScanTo(term); if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0) { ti = enumerator.TermInfo(); if (cache != null) { cache.Put(term, ti); } } else { ti = null; } return(ti); }
internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor) { bool success = false; if (indexDivisor < 1 && indexDivisor != - 1) { throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor); } try { directory = dir; segment = seg; fieldInfos = fis; origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false); size = origEnum.size; if (indexDivisor != - 1) { // Load terms index totalIndexInterval = origEnum.indexInterval * indexDivisor; var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true); try { int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index indexTerms = new Term[indexSize]; indexInfos = new TermInfo[indexSize]; indexPointers = new long[indexSize]; for (int i = 0; indexEnum.Next(); i++) { indexTerms[i] = indexEnum.Term; indexInfos[i] = indexEnum.TermInfo(); indexPointers[i] = indexEnum.indexPointer; for (int j = 1; j < indexDivisor; j++) if (!indexEnum.Next()) break; } } finally { indexEnum.Close(); } } else { // Do not load terms index: totalIndexInterval = - 1; indexTerms = null; indexInfos = null; indexPointers = null; } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
// FIXME: OG: remove hard-coded file names public static void Test() { System.IO.FileInfo file = new System.IO.FileInfo("words.txt"); System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes"); System.DateTime start = System.DateTime.Now; System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); System.IO.FileStream ws = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read); System.IO.StreamReader wr = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding); for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine()) { keys.Add(new Term("word", key)); } wr.Close(); System.DateTime end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words"); start = System.DateTime.Now; System.Random gen = new System.Random((System.Int32) 1251971); long fp = (gen.Next() & 0xF) + 1; long pp = (gen.Next() & 0xF) + 1; int[] docFreqs = new int[keys.Count]; long[] freqPointers = new long[keys.Count]; long[] proxPointers = new long[keys.Count]; for (int i = 0; i < keys.Count; i++) { docFreqs[i] = (gen.Next() & 0xF) + 1; freqPointers[i] = fp; proxPointers[i] = pp; fp += (gen.Next() & 0xF) + 1; ; pp += (gen.Next() & 0xF) + 1; ; } end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to generate values"); start = System.DateTime.Now; Directory store = FSDirectory.GetDirectory("test.store", true); FieldInfos fis = new FieldInfos(); TermInfosWriter writer = new TermInfosWriter(store, "words", fis); fis.Add("word", false); for (int i = 0; i < keys.Count; i++) { writer.Add((Term)keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i])); } writer.Close(); end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to write table"); System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes"); start = System.DateTime.Now; TermInfosReader reader = new TermInfosReader(store, "words", fis); end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to open table"); start = System.DateTime.Now; SegmentTermEnum enumerator = reader.Terms(); for (int i = 0; i < keys.Count; i++) { enumerator.Next(); Term key = (Term)keys[i]; if (!key.Equals(enumerator.Term())) { throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i); } TermInfo ti = enumerator.TermInfo(); if (ti.docFreq != docFreqs[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i); } if (ti.freqPointer != freqPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i); } if (ti.proxPointer != proxPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i); } } end = System.DateTime.Now; System.Console.Out.Write(end.Ticks - start.Ticks); System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words"); start = System.DateTime.Now; for (int i = 0; i < keys.Count; i++) { Term key = (Term)keys[i]; TermInfo ti = reader.Get(key); if (ti.docFreq != docFreqs[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i); } if (ti.freqPointer != freqPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i); } if (ti.proxPointer != proxPointers[i]) { throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i); } } end = System.DateTime.Now; System.Console.Out.Write((end.Ticks - start.Ticks) / (float)keys.Count); System.Console.Out.WriteLine(" average milliseconds per lookup"); TermEnum e = reader.Terms(new Term("word", "azz")); System.Console.Out.WriteLine("Word after azz is " + e.Term().text); reader.Close(); store.Close(); }