예제 #1
0
        // FIXME: OG: remove hard-coded file names
        public static void  Test()
        {
            System.IO.FileInfo file = new System.IO.FileInfo("words.txt");
            System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes");

            System.DateTime start = System.DateTime.Now;

            System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
            System.IO.FileStream         ws   = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
            System.IO.StreamReader       wr   = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding);

            for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine())
            {
                keys.Add(new Term("word", key));
            }
            wr.Close();

            System.DateTime end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words");

            start = System.DateTime.Now;

            System.Random gen = new System.Random((System.Int32) 1251971);
            long          fp  = (gen.Next() & 0xF) + 1;
            long          pp  = (gen.Next() & 0xF) + 1;

            int[]  docFreqs     = new int[keys.Count];
            long[] freqPointers = new long[keys.Count];
            long[] proxPointers = new long[keys.Count];
            for (int i = 0; i < keys.Count; i++)
            {
                docFreqs[i]     = (gen.Next() & 0xF) + 1;
                freqPointers[i] = fp;
                proxPointers[i] = pp;
                fp += (gen.Next() & 0xF) + 1;
                ;
                pp += (gen.Next() & 0xF) + 1;
                ;
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to generate values");

            start = System.DateTime.Now;

            Directory  store = FSDirectory.GetDirectory("test.store", true);
            FieldInfos fis   = new FieldInfos();

            TermInfosWriter writer = new TermInfosWriter(store, "words", fis);

            fis.Add("word", false);

            for (int i = 0; i < keys.Count; i++)
            {
                writer.Add((Term)keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i]));
            }

            writer.Close();

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to write table");

            System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes");

            start = System.DateTime.Now;

            TermInfosReader reader = new TermInfosReader(store, "words", fis);

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to open table");

            start = System.DateTime.Now;

            SegmentTermEnum enumerator = reader.Terms();

            for (int i = 0; i < keys.Count; i++)
            {
                enumerator.Next();
                Term key = (Term)keys[i];
                if (!key.Equals(enumerator.Term()))
                {
                    throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i);
                }
                TermInfo ti = enumerator.TermInfo();
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words");

            start = System.DateTime.Now;

            for (int i = 0; i < keys.Count; i++)
            {
                Term     key = (Term)keys[i];
                TermInfo ti  = reader.Get(key);
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write((end.Ticks - start.Ticks) / (float)keys.Count);
            System.Console.Out.WriteLine(" average milliseconds per lookup");

            TermEnum e = reader.Terms(new Term("word", "azz"));

            System.Console.Out.WriteLine("Word after azz is " + e.Term().text);

            reader.Close();

            store.Close();
        }
예제 #2
0
        internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
        {
            bool success = false;

            if (indexDivisor < 1 && indexDivisor != -1)
            {
                throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
            }

            try
            {
                directory  = dir;
                segment    = seg;
                fieldInfos = fis;

                origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
                size     = origEnum.size;


                if (indexDivisor != -1)
                {
                    // Load terms index
                    totalIndexInterval = origEnum.indexInterval * indexDivisor;
                    SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);

                    try
                    {
                        int indexSize = 1 + ((int)indexEnum.size - 1) / indexDivisor;                          // otherwise read index

                        indexTerms    = new Term[indexSize];
                        indexInfos    = new TermInfo[indexSize];
                        indexPointers = new long[indexSize];

                        for (int i = 0; indexEnum.Next(); i++)
                        {
                            indexTerms[i]    = indexEnum.Term();
                            indexInfos[i]    = indexEnum.TermInfo();
                            indexPointers[i] = indexEnum.indexPointer;

                            for (int j = 1; j < indexDivisor; j++)
                            {
                                if (!indexEnum.Next())
                                {
                                    break;
                                }
                            }
                        }
                    }
                    finally
                    {
                        indexEnum.Close();
                    }
                }
                else
                {
                    // Do not load terms index:
                    totalIndexInterval = -1;
                    indexTerms         = null;
                    indexInfos         = null;
                    indexPointers      = null;
                }
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Close();
                }
            }
        }
예제 #3
0
		internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
		{
			bool success = false;
			
			if (indexDivisor < 1 && indexDivisor != - 1)
			{
				throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
			}
			
			try
			{
				directory = dir;
				segment = seg;
				fieldInfos = fis;
				
				origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false);
				size = origEnum.size;
				
				
				if (indexDivisor != - 1)
				{
					// Load terms index
					totalIndexInterval = origEnum.indexInterval * indexDivisor;
					SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true);
					
					try
					{
						int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
						
						indexTerms = new Term[indexSize];
						indexInfos = new TermInfo[indexSize];
						indexPointers = new long[indexSize];
						
						for (int i = 0; indexEnum.Next(); i++)
						{
							indexTerms[i] = indexEnum.Term();
							indexInfos[i] = indexEnum.TermInfo();
							indexPointers[i] = indexEnum.indexPointer;
							
							for (int j = 1; j < indexDivisor; j++)
								if (!indexEnum.Next())
									break;
						}
					}
					finally
					{
						indexEnum.Close();
					}
				}
				else
				{
					// Do not load terms index:
					totalIndexInterval = - 1;
					indexTerms = null;
					indexInfos = null;
					indexPointers = null;
				}
				success = true;
			}
			finally
			{
				// With lock-less commits, it's entirely possible (and
				// fine) to hit a FileNotFound exception above. In
				// this case, we want to explicitly close any subset
				// of things that were opened so that we don't have to
				// wait for a GC to do so.
				if (!success)
				{
					Close();
				}
			}
		}
예제 #4
0
        /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
        private TermInfo Get(Term term, bool useCache)
        {
            if (size == 0)
            {
                return(null);
            }

            EnsureIndexIsRead();

            TermInfo        ti;
            ThreadResources resources = GetThreadResources();

            Lucene.Net.Util.Cache.Cache cache = null;

            if (useCache)
            {
                cache = resources.termInfoCache;
                // check the cache first if the term was recently looked up
                ti = (TermInfo)cache.Get(term);
                if (ti != null)
                {
                    return(ti);
                }
            }

            // optimize sequential access: first try scanning cached enum w/o seeking
            SegmentTermEnum enumerator = resources.termEnum;

            if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
            {
                int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1;
                if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
                {
                    // no need to seek

                    int numScans = enumerator.ScanTo(term);
                    if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
                    {
                        ti = enumerator.TermInfo();
                        if (cache != null && numScans > 1)
                        {
                            // we only  want to put this TermInfo into the cache if
                            // scanEnum skipped more than one dictionary entry.
                            // This prevents RangeQueries or WildcardQueries to
                            // wipe out the cache when they iterate over a large numbers
                            // of terms in order
                            cache.Put(term, ti);
                        }
                    }
                    else
                    {
                        ti = null;
                    }

                    return(ti);
                }
            }

            // random-access: must seek
            SeekEnum(enumerator, GetIndexOffset(term));
            enumerator.ScanTo(term);
            if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
            {
                ti = enumerator.TermInfo();
                if (cache != null)
                {
                    cache.Put(term, ti);
                }
            }
            else
            {
                ti = null;
            }
            return(ti);
        }
예제 #5
0
        private Term ScanEnum(SegmentTermEnum enumerator, int position)
        {
            while (enumerator.position < position)
                if (!enumerator.Next())
                    return null;

            return enumerator.Term();
        }