Exemplo n.º 1
0
        /// <summary>Returns the position of a Term in the set or -1. </summary>
        internal long GetPosition(Term term)
        {
            if (size == 0)
            {
                return(-1);
            }

            EnsureIndexIsRead();
            int indexOffset = GetIndexOffset(term);

            SegmentTermEnum enumerator = GetThreadResources().termEnum;

            SeekEnum(enumerator, indexOffset);

            while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
            {
            }

            if (term.CompareTo(enumerator.Term()) == 0)
            {
                return(enumerator.position);
            }
            else
            {
                return(-1);
            }
        }
Exemplo n.º 2
0
        internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize)
        {
            bool success = false;

            try
            {
                directory  = dir;
                segment    = seg;
                fieldInfos = fis;

                origEnum           = new SegmentTermEnum(directory.OpenInput(segment + ".tis", readBufferSize), fieldInfos, false);
                size               = origEnum.size;
                totalIndexInterval = origEnum.indexInterval;

                indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii", readBufferSize), fieldInfos, true);

                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
Exemplo n.º 3
0
        internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize)
        {
            bool success = false;

            try
            {
                directory = dir;
                segment = seg;
                fieldInfos = fis;

                origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
                size = origEnum.size;
                totalIndexInterval = origEnum.indexInterval;

                indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);

                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
Exemplo n.º 4
0
        /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
        public TermInfo Get(Term term)
        {
            if (size == 0)
            {
                return(null);
            }

            EnsureIndexIsRead();

            // optimize sequential access: first try scanning cached enum w/o seeking
            SegmentTermEnum enumerator = GetEnum();

            if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
            {
                int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1;
                if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
                {
                    return(ScanEnum(term));                    // no need to seek
                }
            }

            // random-access: must seek
            SeekEnum(GetIndexOffset(term));
            return(ScanEnum(term));
        }
Exemplo n.º 5
0
        private void  EnsureIndexIsRead()
        {
            lock (this)
            {
                if (indexTerms != null)
                {
                    // index already read
                    return;                      // do nothing
                }
                try
                {
                    int indexSize = (int)indexEnum.size;                      // otherwise read index

                    indexTerms    = new Term[indexSize];
                    indexInfos    = new TermInfo[indexSize];
                    indexPointers = new long[indexSize];

                    for (int i = 0; indexEnum.Next(); i++)
                    {
                        indexTerms[i]    = indexEnum.Term();
                        indexInfos[i]    = indexEnum.TermInfo();
                        indexPointers[i] = indexEnum.indexPointer;
                    }
                }
                finally
                {
                    indexEnum.Close();
                    indexEnum = null;
                }
            }
        }
Exemplo n.º 6
0
        private SegmentTermEnum GetEnum()
        {
            SegmentTermEnum termEnum = (SegmentTermEnum)System.Threading.Thread.GetData(enumerators);

            if (termEnum == null)
            {
                termEnum = Terms();
                System.Threading.Thread.SetData(enumerators, termEnum);
            }
            return(termEnum);
        }
Exemplo n.º 7
0
		public /*internal*/ TermInfosReader(Directory dir, System.String seg, FieldInfos fis)
		{
			directory = dir;
			segment = seg;
			fieldInfos = fis;
			
			origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis"), fieldInfos, false);
			size = origEnum.size;
			
			indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii"), fieldInfos, true);
		}
Exemplo n.º 8
0
        public TermInfosReader(Directory dir, System.String seg, FieldInfos fis)
        {
            directory  = dir;
            segment    = seg;
            fieldInfos = fis;

            origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis"), fieldInfos, false);
            size     = origEnum.size;

            indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii"), fieldInfos, true);
        }
Exemplo n.º 9
0
        private Term ScanEnum(SegmentTermEnum enumerator, int position)
        {
            while (enumerator.position < position)
            {
                if (!enumerator.Next())
                {
                    return(null);
                }
            }

            return(enumerator.Term());
        }
Exemplo n.º 10
0
        /// <summary>Scans within block for matching term. </summary>
        private TermInfo ScanEnum(Term term)
        {
            SegmentTermEnum enumerator = GetEnum();

            enumerator.ScanTo(term);
            if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
            {
                return(enumerator.TermInfo());
            }
            else
            {
                return(null);
            }
        }
Exemplo n.º 11
0
        /// <summary>Returns the nth term in the set. </summary>
        internal Term Get(int position)
        {
            if (size == 0)
            {
                return(null);
            }

            SegmentTermEnum enumerator = GetEnum();

            if (enumerator != null && enumerator.Term() != null && position >= enumerator.position && position < (enumerator.position + enumerator.indexInterval))
            {
                return(ScanEnum(position));                // can avoid seek
            }
            SeekEnum(position / enumerator.indexInterval); // must seek
            return(ScanEnum(position));
        }
Exemplo n.º 12
0
        /// <summary>Returns the nth term in the set. </summary>
        internal Term Get(int position)
        {
            if (size == 0)
            {
                return(null);
            }

            SegmentTermEnum enumerator = GetThreadResources().termEnum;

            if (enumerator != null && enumerator.Term() != null && position >= enumerator.position && position < (enumerator.position + totalIndexInterval))
            {
                return(ScanEnum(enumerator, position));                // can avoid seek
            }
            SeekEnum(enumerator, position / totalIndexInterval);       // must seek
            return(ScanEnum(enumerator, position));
        }
Exemplo n.º 13
0
        public virtual void  TestPrevTermAtEnd()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            AddDoc(writer, "aaa bbb");
            writer.Close();
            SegmentReader   reader   = SegmentReader.GetOnlySegmentReader(dir);
            SegmentTermEnum termEnum = (SegmentTermEnum)reader.Terms();

            Assert.IsTrue(termEnum.Next());
            Assert.AreEqual("aaa", termEnum.Term.Text);
            Assert.IsTrue(termEnum.Next());
            Assert.AreEqual("aaa", termEnum.Prev().Text);
            Assert.AreEqual("bbb", termEnum.Term.Text);
            Assert.IsFalse(termEnum.Next());
            Assert.AreEqual("bbb", termEnum.Prev().Text);
        }
Exemplo n.º 14
0
        public /*internal*/ void  Close()
        {
            SegmentTermEnum termEnum = (SegmentTermEnum)System.Threading.Thread.GetData(enumerators);

            if (termEnum != null)
            {
                termEnum.Close();
                System.Threading.Thread.SetData(enumerators, null);
            }
            if (origEnum != null)
            {
                origEnum.Close();
            }
            if (indexEnum != null)
            {
                indexEnum.Close();
            }
        }
Exemplo n.º 15
0
        public object Clone()
        {
            SegmentTermEnum clone = null;

            try
            {
                clone = (SegmentTermEnum)base.MemberwiseClone();
            }
            catch (System.Exception)
            {
            }

            clone.input    = (IndexInput)input.Clone();
            clone.termInfo = new TermInfo(termInfo);

            clone.termBuffer = (TermBuffer)termBuffer.Clone();
            clone.prevBuffer = (TermBuffer)prevBuffer.Clone();
            clone.scanBuffer = new TermBuffer();

            return(clone);
        }
Exemplo n.º 16
0
        public virtual void  Seek(TermEnum termEnum)
        {
            TermInfo ti;
            Term     term;

            // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
            if (termEnum is SegmentTermEnum && ((SegmentTermEnum)termEnum).fieldInfos == parent.fieldInfos)
            {
                // optimized case
                SegmentTermEnum segmentTermEnum = ((SegmentTermEnum)termEnum);
                term = segmentTermEnum.Term();
                ti   = segmentTermEnum.TermInfo();
            }
            else
            {
                // punt case
                term = termEnum.Term();
                ti   = parent.tis.Get(term);
            }

            Seek(ti, term);
        }
Exemplo n.º 17
0
        internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
        {
            bool success = false;

            if (indexDivisor < 1 && indexDivisor != -1)
            {
                throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
            }

            try
            {
                directory  = dir;
                segment    = seg;
                fieldInfos = fis;

                origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
                size     = origEnum.size;


                if (indexDivisor != -1)
                {
                    // Load terms index
                    totalIndexInterval = origEnum.indexInterval * indexDivisor;
                    SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);

                    try
                    {
                        int indexSize = 1 + ((int)indexEnum.size - 1) / indexDivisor;                          // otherwise read index

                        indexTerms    = new Term[indexSize];
                        indexInfos    = new TermInfo[indexSize];
                        indexPointers = new long[indexSize];

                        for (int i = 0; indexEnum.Next(); i++)
                        {
                            indexTerms[i]    = indexEnum.Term();
                            indexInfos[i]    = indexEnum.TermInfo();
                            indexPointers[i] = indexEnum.indexPointer;

                            for (int j = 1; j < indexDivisor; j++)
                            {
                                if (!indexEnum.Next())
                                {
                                    break;
                                }
                            }
                        }
                    }
                    finally
                    {
                        indexEnum.Close();
                    }
                }
                else
                {
                    // Do not load terms index:
                    totalIndexInterval = -1;
                    indexTerms         = null;
                    indexInfos         = null;
                    indexPointers      = null;
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
Exemplo n.º 18
0
        /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
        private TermInfo Get(Term term, bool useCache)
        {
            if (size == 0)
            {
                return(null);
            }

            EnsureIndexIsRead();

            TermInfo        ti;
            ThreadResources resources = GetThreadResources();

            Lucene.Net.Util.Cache.Cache cache = null;

            if (useCache)
            {
                cache = resources.termInfoCache;
                // check the cache first if the term was recently looked up
                ti = (TermInfo)cache.Get(term);
                if (ti != null)
                {
                    return(ti);
                }
            }

            // optimize sequential access: first try scanning cached enum w/o seeking
            SegmentTermEnum enumerator = resources.termEnum;

            if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
            {
                int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1;
                if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
                {
                    // no need to seek

                    int numScans = enumerator.ScanTo(term);
                    if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
                    {
                        ti = enumerator.TermInfo();
                        if (cache != null && numScans > 1)
                        {
                            // we only  want to put this TermInfo into the cache if
                            // scanEnum skipped more than one dictionary entry.
                            // This prevents RangeQueries or WildcardQueries to
                            // wipe out the cache when they iterate over a large numbers
                            // of terms in order
                            cache.Put(term, ti);
                        }
                    }
                    else
                    {
                        ti = null;
                    }

                    return(ti);
                }
            }

            // random-access: must seek
            SeekEnum(enumerator, GetIndexOffset(term));
            enumerator.ScanTo(term);
            if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
            {
                ti = enumerator.TermInfo();
                if (cache != null)
                {
                    cache.Put(term, ti);
                }
            }
            else
            {
                ti = null;
            }
            return(ti);
        }
Exemplo n.º 19
0
 private void  SeekEnum(SegmentTermEnum enumerator, int indexOffset)
 {
     enumerator.Seek(indexPointers[indexOffset], ((long)indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
 }
Exemplo n.º 20
0
 internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
 {
     bool success = false;
     
     if (indexDivisor < 1 && indexDivisor != - 1)
     {
         throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
     }
     
     try
     {
         directory = dir;
         segment = seg;
         fieldInfos = fis;
         
         origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
         size = origEnum.size;
         
         
         if (indexDivisor != - 1)
         {
             // Load terms index
             totalIndexInterval = origEnum.indexInterval * indexDivisor;
             var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);
             
             try
             {
                 int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
                 
                 indexTerms = new Term[indexSize];
                 indexInfos = new TermInfo[indexSize];
                 indexPointers = new long[indexSize];
                 
                 for (int i = 0; indexEnum.Next(); i++)
                 {
                     indexTerms[i] = indexEnum.Term;
                     indexInfos[i] = indexEnum.TermInfo();
                     indexPointers[i] = indexEnum.indexPointer;
                     
                     for (int j = 1; j < indexDivisor; j++)
                         if (!indexEnum.Next())
                             break;
                 }
             }
             finally
             {
                 indexEnum.Close();
             }
         }
         else
         {
             // Do not load terms index:
             totalIndexInterval = - 1;
             indexTerms = null;
             indexInfos = null;
             indexPointers = null;
         }
         success = true;
     }
     finally
     {
         // With lock-less commits, it's entirely possible (and
         // fine) to hit a FileNotFound exception above. In
         // this case, we want to explicitly close any subset
         // of things that were opened so that we don't have to
         // wait for a GC to do so.
         if (!success)
         {
             Dispose();
         }
     }
 }
Exemplo n.º 21
0
 private void SeekEnum(SegmentTermEnum enumerator, int indexOffset)
 {
     enumerator.Seek(indexPointers[indexOffset], ((long)indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
 }
Exemplo n.º 22
0
		private void  EnsureIndexIsRead()
		{
			lock (this)
			{
				if (indexTerms != null)
				// index already read
					return ; // do nothing
				try
				{
					int indexSize = (int) indexEnum.size; // otherwise read index
					
					indexTerms = new Term[indexSize];
					indexInfos = new TermInfo[indexSize];
					indexPointers = new long[indexSize];
					
					for (int i = 0; indexEnum.Next(); i++)
					{
						indexTerms[i] = indexEnum.Term();
						indexInfos[i] = indexEnum.TermInfo();
						indexPointers[i] = indexEnum.indexPointer;
					}
				}
				finally
				{
					indexEnum.Close();
					indexEnum = null;
				}
			}
		}
Exemplo n.º 23
0
        private Term ScanEnum(SegmentTermEnum enumerator, int position)
        {
            while (enumerator.position < position)
                if (!enumerator.Next())
                    return null;

            return enumerator.Term();
        }
Exemplo n.º 24
0
        // FIXME: OG: remove hard-coded file names
        public static void  Test()
        {
            System.IO.FileInfo file = new System.IO.FileInfo("words.txt");
            System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes");

            System.DateTime start = System.DateTime.Now;

            System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
            System.IO.FileStream         ws   = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
            System.IO.StreamReader       wr   = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding);

            for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine())
            {
                keys.Add(new Term("word", key));
            }
            wr.Close();

            System.DateTime end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words");

            start = System.DateTime.Now;

            System.Random gen = new System.Random((System.Int32) 1251971);
            long          fp  = (gen.Next() & 0xF) + 1;
            long          pp  = (gen.Next() & 0xF) + 1;

            int[]  docFreqs     = new int[keys.Count];
            long[] freqPointers = new long[keys.Count];
            long[] proxPointers = new long[keys.Count];
            for (int i = 0; i < keys.Count; i++)
            {
                docFreqs[i]     = (gen.Next() & 0xF) + 1;
                freqPointers[i] = fp;
                proxPointers[i] = pp;
                fp += (gen.Next() & 0xF) + 1;
                ;
                pp += (gen.Next() & 0xF) + 1;
                ;
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to generate values");

            start = System.DateTime.Now;

            Directory  store = FSDirectory.GetDirectory("test.store", true);
            FieldInfos fis   = new FieldInfos();

            TermInfosWriter writer = new TermInfosWriter(store, "words", fis);

            fis.Add("word", false);

            for (int i = 0; i < keys.Count; i++)
            {
                writer.Add((Term)keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i]));
            }

            writer.Close();

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to write table");

            System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes");

            start = System.DateTime.Now;

            TermInfosReader reader = new TermInfosReader(store, "words", fis);

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to open table");

            start = System.DateTime.Now;

            SegmentTermEnum enumerator = reader.Terms();

            for (int i = 0; i < keys.Count; i++)
            {
                enumerator.Next();
                Term key = (Term)keys[i];
                if (!key.Equals(enumerator.Term()))
                {
                    throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i);
                }
                TermInfo ti = enumerator.TermInfo();
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words");

            start = System.DateTime.Now;

            for (int i = 0; i < keys.Count; i++)
            {
                Term     key = (Term)keys[i];
                TermInfo ti  = reader.Get(key);
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write((end.Ticks - start.Ticks) / (float)keys.Count);
            System.Console.Out.WriteLine(" average milliseconds per lookup");

            TermEnum e = reader.Terms(new Term("word", "azz"));

            System.Console.Out.WriteLine("Word after azz is " + e.Term().text);

            reader.Close();

            store.Close();
        }