示例#1
0
        private void  EnsureIndexIsRead()
        {
            lock (this)
            {
                if (indexTerms != null)
                {
                    // index already read
                    return;                      // do nothing
                }
                try
                {
                    int indexSize = (int)indexEnum.size;                      // otherwise read index

                    indexTerms    = new Term[indexSize];
                    indexInfos    = new TermInfo[indexSize];
                    indexPointers = new long[indexSize];

                    for (int i = 0; indexEnum.Next(); i++)
                    {
                        indexTerms[i]    = indexEnum.Term();
                        indexInfos[i]    = indexEnum.TermInfo();
                        indexPointers[i] = indexEnum.indexPointer;
                    }
                }
                finally
                {
                    indexEnum.Close();
                    indexEnum = null;
                }
            }
        }
示例#2
0
        /// <summary>Scans within block for matching term. </summary>
        private TermInfo ScanEnum(Term term)
        {
            SegmentTermEnum enumerator = GetEnum();

            enumerator.ScanTo(term);
            if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
            {
                return(enumerator.TermInfo());
            }
            else
            {
                return(null);
            }
        }
示例#3
0
        public virtual void  Seek(TermEnum termEnum)
        {
            TermInfo ti;
            Term     term;

            // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
            if (termEnum is SegmentTermEnum && ((SegmentTermEnum)termEnum).fieldInfos == parent.fieldInfos)
            {
                // optimized case
                SegmentTermEnum segmentTermEnum = ((SegmentTermEnum)termEnum);
                term = segmentTermEnum.Term();
                ti   = segmentTermEnum.TermInfo();
            }
            else
            {
                // punt case
                term = termEnum.Term();
                ti   = parent.tis.Get(term);
            }

            Seek(ti, term);
        }
示例#4
0
        internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
        {
            bool success = false;

            if (indexDivisor < 1 && indexDivisor != -1)
            {
                throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
            }

            try
            {
                directory  = dir;
                segment    = seg;
                fieldInfos = fis;

                origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
                size     = origEnum.size;


                if (indexDivisor != -1)
                {
                    // Load terms index
                    totalIndexInterval = origEnum.indexInterval * indexDivisor;
                    SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);

                    try
                    {
                        int indexSize = 1 + ((int)indexEnum.size - 1) / indexDivisor;                          // otherwise read index

                        indexTerms    = new Term[indexSize];
                        indexInfos    = new TermInfo[indexSize];
                        indexPointers = new long[indexSize];

                        for (int i = 0; indexEnum.Next(); i++)
                        {
                            indexTerms[i]    = indexEnum.Term();
                            indexInfos[i]    = indexEnum.TermInfo();
                            indexPointers[i] = indexEnum.indexPointer;

                            for (int j = 1; j < indexDivisor; j++)
                            {
                                if (!indexEnum.Next())
                                {
                                    break;
                                }
                            }
                        }
                    }
                    finally
                    {
                        indexEnum.Close();
                    }
                }
                else
                {
                    // Do not load terms index:
                    totalIndexInterval = -1;
                    indexTerms         = null;
                    indexInfos         = null;
                    indexPointers      = null;
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
示例#5
0
        /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
        private TermInfo Get(Term term, bool useCache)
        {
            if (size == 0)
            {
                return(null);
            }

            EnsureIndexIsRead();

            TermInfo        ti;
            ThreadResources resources = GetThreadResources();

            Lucene.Net.Util.Cache.Cache cache = null;

            if (useCache)
            {
                cache = resources.termInfoCache;
                // check the cache first if the term was recently looked up
                ti = (TermInfo)cache.Get(term);
                if (ti != null)
                {
                    return(ti);
                }
            }

            // optimize sequential access: first try scanning cached enum w/o seeking
            SegmentTermEnum enumerator = resources.termEnum;

            if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
            {
                int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1;
                if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
                {
                    // no need to seek

                    int numScans = enumerator.ScanTo(term);
                    if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
                    {
                        ti = enumerator.TermInfo();
                        if (cache != null && numScans > 1)
                        {
                            // we only  want to put this TermInfo into the cache if
                            // scanEnum skipped more than one dictionary entry.
                            // This prevents RangeQueries or WildcardQueries to
                            // wipe out the cache when they iterate over a large numbers
                            // of terms in order
                            cache.Put(term, ti);
                        }
                    }
                    else
                    {
                        ti = null;
                    }

                    return(ti);
                }
            }

            // random-access: must seek
            SeekEnum(enumerator, GetIndexOffset(term));
            enumerator.ScanTo(term);
            if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
            {
                ti = enumerator.TermInfo();
                if (cache != null)
                {
                    cache.Put(term, ti);
                }
            }
            else
            {
                ti = null;
            }
            return(ti);
        }
示例#6
0
 internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
 {
     bool success = false;
     
     if (indexDivisor < 1 && indexDivisor != - 1)
     {
         throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
     }
     
     try
     {
         directory = dir;
         segment = seg;
         fieldInfos = fis;
         
         origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
         size = origEnum.size;
         
         
         if (indexDivisor != - 1)
         {
             // Load terms index
             totalIndexInterval = origEnum.indexInterval * indexDivisor;
             var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);
             
             try
             {
                 int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
                 
                 indexTerms = new Term[indexSize];
                 indexInfos = new TermInfo[indexSize];
                 indexPointers = new long[indexSize];
                 
                 for (int i = 0; indexEnum.Next(); i++)
                 {
                     indexTerms[i] = indexEnum.Term;
                     indexInfos[i] = indexEnum.TermInfo();
                     indexPointers[i] = indexEnum.indexPointer;
                     
                     for (int j = 1; j < indexDivisor; j++)
                         if (!indexEnum.Next())
                             break;
                 }
             }
             finally
             {
                 indexEnum.Close();
             }
         }
         else
         {
             // Do not load terms index:
             totalIndexInterval = - 1;
             indexTerms = null;
             indexInfos = null;
             indexPointers = null;
         }
         success = true;
     }
     finally
     {
         // With lock-less commits, it's entirely possible (and
         // fine) to hit a FileNotFound exception above. In
         // this case, we want to explicitly close any subset
         // of things that were opened so that we don't have to
         // wait for a GC to do so.
         if (!success)
         {
             Dispose();
         }
     }
 }
示例#7
0
        // FIXME: OG: remove hard-coded file names
        public static void  Test()
        {
            System.IO.FileInfo file = new System.IO.FileInfo("words.txt");
            System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes");

            System.DateTime start = System.DateTime.Now;

            System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
            System.IO.FileStream         ws   = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
            System.IO.StreamReader       wr   = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding);

            for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine())
            {
                keys.Add(new Term("word", key));
            }
            wr.Close();

            System.DateTime end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words");

            start = System.DateTime.Now;

            System.Random gen = new System.Random((System.Int32) 1251971);
            long          fp  = (gen.Next() & 0xF) + 1;
            long          pp  = (gen.Next() & 0xF) + 1;

            int[]  docFreqs     = new int[keys.Count];
            long[] freqPointers = new long[keys.Count];
            long[] proxPointers = new long[keys.Count];
            for (int i = 0; i < keys.Count; i++)
            {
                docFreqs[i]     = (gen.Next() & 0xF) + 1;
                freqPointers[i] = fp;
                proxPointers[i] = pp;
                fp += (gen.Next() & 0xF) + 1;
                ;
                pp += (gen.Next() & 0xF) + 1;
                ;
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to generate values");

            start = System.DateTime.Now;

            Directory  store = FSDirectory.GetDirectory("test.store", true);
            FieldInfos fis   = new FieldInfos();

            TermInfosWriter writer = new TermInfosWriter(store, "words", fis);

            fis.Add("word", false);

            for (int i = 0; i < keys.Count; i++)
            {
                writer.Add((Term)keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i]));
            }

            writer.Close();

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to write table");

            System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes");

            start = System.DateTime.Now;

            TermInfosReader reader = new TermInfosReader(store, "words", fis);

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to open table");

            start = System.DateTime.Now;

            SegmentTermEnum enumerator = reader.Terms();

            for (int i = 0; i < keys.Count; i++)
            {
                enumerator.Next();
                Term key = (Term)keys[i];
                if (!key.Equals(enumerator.Term()))
                {
                    throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i);
                }
                TermInfo ti = enumerator.TermInfo();
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words");

            start = System.DateTime.Now;

            for (int i = 0; i < keys.Count; i++)
            {
                Term     key = (Term)keys[i];
                TermInfo ti  = reader.Get(key);
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write((end.Ticks - start.Ticks) / (float)keys.Count);
            System.Console.Out.WriteLine(" average milliseconds per lookup");

            TermEnum e = reader.Terms(new Term("word", "azz"));

            System.Console.Out.WriteLine("Word after azz is " + e.Term().text);

            reader.Close();

            store.Close();
        }