internal override void Seek(TermInfo ti)
 {
     base.Seek(ti);
     if (ti != null)
         proxStream.Seek(ti.proxPointer);
     proxCount = 0;
 }
 internal void Set(TermInfo ti)
 {
     docFreq = ti.docFreq;
     freqPointer = ti.freqPointer;
     proxPointer = ti.proxPointer;
     skipOffset = ti.skipOffset;
 }
Ejemplo n.º 3
0
		internal override void  Seek(TermInfo ti)
		{
			base.Seek(ti);
			if (ti != null)
				lazySkipPointer = ti.proxPointer;
			
			lazySkipDocCount = 0;
			proxCount = 0;
		}
		internal override void  Seek(TermInfo ti, Term term)
		{
			base.Seek(ti, term);
			if (ti != null)
				lazySkipPointer = ti.proxPointer;
			
			lazySkipProxCount = 0;
			proxCount = 0;
			payloadLength = 0;
			needToLoadPayload = false;
		}
Ejemplo n.º 5
0
        internal override void  Seek(TermInfo ti, Term term)
        {
            base.Seek(ti, term);
            if (ti != null)
            {
                lazySkipPointer = ti.proxPointer;
            }

            lazySkipProxCount = 0;
            proxCount         = 0;
            payloadLength     = 0;
            needToLoadPayload = false;
        }
Ejemplo n.º 6
0
        public override int DocFreq(Term t)
        {
            TermInfo ti = tis.Get(t);

            if (ti != null)
            {
                return(ti.docFreq);
            }
            else
            {
                return(0);
            }
        }
Ejemplo n.º 7
0
        internal override void  Seek(TermInfo ti, Term term, IState state)
        {
            base.Seek(ti, term, state);
            if (ti.IsEmpty == false)
            {
                lazySkipPointer = ti.proxPointer;
            }

            lazySkipProxCount = 0;
            proxCount         = 0;
            payloadLength     = 0;
            needToLoadPayload = false;
        }
Ejemplo n.º 8
0
        internal void  Add(Term term, TermInfo ti)
        {
            int length = term.text.Length;

            if (termTextBuffer.Length < length)
            {
                termTextBuffer = new char[(int)(length * 1.25)];
            }

            int i = 0;

            System.Collections.Generic.IEnumerator <char> chars = term.text.GetEnumerator();
            while (chars.MoveNext())
            {
                termTextBuffer[i++] = (char)chars.Current;
            }

            Add(fieldInfos.FieldNumber(term.field), termTextBuffer, 0, length, ti);
        }
Ejemplo n.º 9
0
 internal virtual void  Seek(TermInfo ti)
 {
     count = 0;
     if (ti == null)
     {
         df = 0;
     }
     else
     {
         df          = ti.docFreq;
         doc         = 0;
         skipDoc     = 0;
         skipCount   = 0;
         numSkips    = df / skipInterval;
         freqPointer = ti.freqPointer;
         proxPointer = ti.proxPointer;
         skipPointer = freqPointer + ti.skipOffset;
         freqStream.Seek(freqPointer);
         haveSkipped = false;
     }
 }
Ejemplo n.º 10
0
        internal virtual void  Seek(TermInfo ti, Term term)
        {
            count = 0;
            FieldInfo fi = parent.fieldInfos.FieldInfo(term.field);

            currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
            if (ti == null)
            {
                df = 0;
            }
            else
            {
                df              = ti.docFreq;
                doc             = 0;
                freqBasePointer = ti.freqPointer;
                proxBasePointer = ti.proxPointer;
                skipPointer     = freqBasePointer + ti.skipOffset;
                freqStream.Seek(freqBasePointer);
                haveSkipped = false;
            }
        }
Ejemplo n.º 11
0
		internal virtual void  Seek(TermInfo ti)
		{
			count = 0;
			if (ti == null)
			{
				df = 0;
			}
			else
			{
				df = ti.docFreq;
				doc = 0;
				skipDoc = 0;
				skipCount = 0;
				numSkips = df / skipInterval;
				freqPointer = ti.freqPointer;
				proxPointer = ti.proxPointer;
				skipPointer = freqPointer + ti.skipOffset;
				freqStream.Seek(freqPointer);
				haveSkipped = false;
			}
		}
Ejemplo n.º 12
0
        internal virtual void  Seek(TermInfo ti, Term term, IState state)
        {
            count = 0;
            FieldInfo fi = parent.core.fieldInfos.FieldInfo(term.Field);

            currentFieldOmitTermFreqAndPositions = (fi != null) && fi.omitTermFreqAndPositions;
            currentFieldStoresPayloads           = (fi != null) && fi.storePayloads;
            if (ti.IsEmpty)
            {
                df = 0;
            }
            else
            {
                df              = ti.docFreq;
                doc             = 0;
                freqBasePointer = ti.freqPointer;
                proxBasePointer = ti.proxPointer;
                skipPointer     = freqBasePointer + ti.skipOffset;
                freqStream.Seek(freqBasePointer, state);
                haveSkipped = false;
            }
        }
Ejemplo n.º 13
0
        /// <summary>Adds a new <Term, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        public /*internal*/ void  Add(Term term, TermInfo ti)
        {
            if (!isIndex && term.CompareTo(lastTerm) <= 0)
            {
                throw new System.IO.IOException("term out of order");
            }
            if (ti.freqPointer < lastTi.freqPointer)
            {
                throw new System.IO.IOException("freqPointer out of order");
            }
            if (ti.proxPointer < lastTi.proxPointer)
            {
                throw new System.IO.IOException("proxPointer out of order");
            }

            if (!isIndex && size % indexInterval == 0)
            {
                other.Add(lastTerm, lastTi);                        // add an index term
            }
            WriteTerm(term);                                        // write term
            output.WriteVInt(ti.docFreq);                           // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
                lastIndexPointer = other.output.GetFilePointer();                 // write pointer
            }

            lastTi.Set(ti);
            size++;
        }
Ejemplo n.º 14
0
        /// <summary>Adds a new &lt;fieldNumber, termBytes&gt;, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        internal void  Add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti)
        {
            System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 ||
                                            (isIndex && termBytesLength == 0 && lastTermBytesLength == 0),
                                            "Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) + " (number " + fieldNumber + ")" +
                                            " lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" +
                                            " text=" + System.Text.Encoding.UTF8.GetString(termBytes, 0, termBytesLength) + " lastText=" + System.Text.Encoding.UTF8.GetString(lastTermBytes, 0, lastTermBytesLength));

            System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
            System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");

            if (!isIndex && size % indexInterval == 0)
            {
                other.Add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term
            }
            WriteTerm(fieldNumber, termBytes, termBytesLength);                         // write term

            output.WriteVInt(ti.docFreq);                                               // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer);                     // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.FilePointer - lastIndexPointer);
                lastIndexPointer = other.output.FilePointer;                 // write pointer
            }

            lastFieldNumber = fieldNumber;
            lastTi.Set(ti);
            size++;
        }
Ejemplo n.º 15
0
		private void  WritePostings(Posting[] postings, System.String segment)
		{
			IndexOutput freq = null, prox = null;
			TermInfosWriter tis = null;
			TermVectorsWriter termVectorWriter = null;
			try
			{
				//open files for inverse index storage
				freq = directory.CreateOutput(segment + ".frq");
				prox = directory.CreateOutput(segment + ".prx");
				tis = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
				TermInfo ti = new TermInfo();
				System.String currentField = null;
				
				for (int i = 0; i < postings.Length; i++)
				{
					Posting posting = postings[i];
					
					// add an entry to the dictionary with pointers to prox and freq files
					ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), - 1);
					tis.Add(posting.term, ti);
					
					// add an entry to the freq file
					int postingFreq = posting.freq;
					if (postingFreq == 1)
					// optimize freq=1
						freq.WriteVInt(1);
					// set low bit of doc num.
					else
					{
						freq.WriteVInt(0); // the document number
						freq.WriteVInt(postingFreq); // frequency in doc
					}
					
					int lastPosition = 0; // write positions
					int[] positions = posting.positions;
					for (int j = 0; j < postingFreq; j++)
					{
						// use delta-encoding
						int position = positions[j];
						prox.WriteVInt(position - lastPosition);
						lastPosition = position;
					}
					// check to see if we switched to a new field
					System.String termField = posting.term.Field();
					if (currentField != termField)
					{
						// changing field - see if there is something to save
						currentField = termField;
						FieldInfo fi = fieldInfos.FieldInfo(currentField);
						if (fi.storeTermVector)
						{
							if (termVectorWriter == null)
							{
								termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos);
								termVectorWriter.OpenDocument();
							}
							termVectorWriter.OpenField(currentField);
						}
						else if (termVectorWriter != null)
						{
							termVectorWriter.CloseField();
						}
					}
					if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
					{
						termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets);
					}
				}
				if (termVectorWriter != null)
					termVectorWriter.CloseDocument();
			}
			finally
			{
				// make an effort to close all streams we can but remember and re-throw
				// the first exception encountered in this process
				System.IO.IOException keep = null;
				if (freq != null)
					try
					{
						freq.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (prox != null)
					try
					{
						prox.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (tis != null)
					try
					{
						tis.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (termVectorWriter != null)
					try
					{
						termVectorWriter.Close();
					}
					catch (System.IO.IOException e)
					{
						if (keep == null)
							keep = e;
					}
				if (keep != null)
				{
					throw new System.IO.IOException(keep.StackTrace);
				}
			}
		}
Ejemplo n.º 16
0
 /// <summary>Sets the argument to the current TermInfo in the enumeration.
 /// Initially invalid, valid after next() called for the first time.
 /// </summary>
 internal void  TermInfo(TermInfo ti)
 {
     ti.Set(termInfo);
 }
Ejemplo n.º 17
0
		/// <summary>Sets the argument to the current TermInfo in the enumeration.
		/// Initially invalid, valid after next() called for the first time.
		/// </summary>
		internal void  TermInfo(TermInfo ti)
		{
			ti.Set(termInfo);
		}
Ejemplo n.º 18
0
		internal void  Seek(long pointer, int p, Term t, TermInfo ti)
		{
			input.Seek(pointer);
			position = p;
			termBuffer.Set(t);
			prevBuffer.Reset();
			termInfo.Set(ti);
		}
Ejemplo n.º 19
0
 internal void  Add(Term term, TermInfo ti)
 {
     UnicodeUtil.UTF16toUTF8(term.Text, 0, term.Text.Length, utf8Result);
     Add(fieldInfos.FieldNumber(term.Field), utf8Result.result, utf8Result.length, ti);
 }
Ejemplo n.º 20
0
        /// <summary>Adds a new <<fieldNumber, termText>, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        internal void  Add(int fieldNumber, char[] termText, int termTextStart, int termTextLength, TermInfo ti)
        {
            System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termText, termTextStart, termTextLength) < 0 ||
                                            (isIndex && termTextLength == 0 && lastTermTextLength == 0),
                                            "Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) + "(number " + fieldNumber + ")" +
                                            " lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" +
                                            " text=" + new String(termText, termTextStart, termTextLength) + " lastText=" + new String(lastTermText, 0, lastTermTextLength));

            System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
            System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");

            if (!isIndex && size % indexInterval == 0)
            {
                other.Add(lastFieldNumber, lastTermText, 0, lastTermTextLength, lastTi); // add an index term
            }
            WriteTerm(fieldNumber, termText, termTextStart, termTextLength);             // write term

            output.WriteVInt(ti.docFreq);                                                // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer);                      // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
                lastIndexPointer = other.output.GetFilePointer();                 // write pointer
            }

            if (lastTermText.Length < termTextLength)
            {
                lastTermText = new char[(int)(termTextLength * 1.25)];
            }
            Array.Copy(termText, termTextStart, lastTermText, 0, termTextLength);
            lastTermTextLength = termTextLength;
            lastFieldNumber    = fieldNumber;

            lastTi.Set(ti);
            size++;
        }
        /// <summary>Adds a new <Term, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        /*internal*/
        public void Add(Term term, TermInfo ti)
        {
            if (!isIndex && term.CompareTo(lastTerm) <= 0)
            {
                throw new System.IO.IOException("term out of order (\"" + term + "\".compareTo(\"" + lastTerm + "\") <= 0)");
            }
            if (ti.freqPointer < lastTi.freqPointer)
                throw new System.IO.IOException("freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
            if (ti.proxPointer < lastTi.proxPointer)
                throw new System.IO.IOException("proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");

            if (!isIndex && size % indexInterval == 0)
                other.Add(lastTerm, lastTi); // add an index term

            WriteTerm(term); // write term
            output.WriteVInt(ti.docFreq); // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
                lastIndexPointer = other.output.GetFilePointer(); // write pointer
            }

            lastTi.Set(ti);
            size++;
        }
Ejemplo n.º 22
0
		internal void  Add(Term term, TermInfo ti)
		{
			
			int length = term.text.Length;
			if (termTextBuffer.Length < length)
			{
				termTextBuffer = new char[(int) (length * 1.25)];
			}

            int i = 0;
            System.Collections.Generic.IEnumerator<char> chars = term.text.GetEnumerator();
            while (chars.MoveNext())
            {
                termTextBuffer[i++] = (char)chars.Current;
            }
			
			Add(fieldInfos.FieldNumber(term.field), termTextBuffer, 0, length, ti);
		}
Ejemplo n.º 23
0
        public virtual void  Seek(Term term)
        {
            TermInfo ti = parent.tis.Get(term);

            Seek(ti);
        }
Ejemplo n.º 24
0
        // FIXME: OG: remove hard-coded file names
        public static void  Test()
        {
            System.IO.FileInfo file = new System.IO.FileInfo("words.txt");
            System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes");

            System.DateTime start = System.DateTime.Now;

            System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
            System.IO.FileStream         ws   = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
            System.IO.StreamReader       wr   = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding);

            for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine())
            {
                keys.Add(new Term("word", key));
            }
            wr.Close();

            System.DateTime end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words");

            start = System.DateTime.Now;

            System.Random gen = new System.Random((System.Int32) 1251971);
            long          fp  = (gen.Next() & 0xF) + 1;
            long          pp  = (gen.Next() & 0xF) + 1;

            int[]  docFreqs     = new int[keys.Count];
            long[] freqPointers = new long[keys.Count];
            long[] proxPointers = new long[keys.Count];
            for (int i = 0; i < keys.Count; i++)
            {
                docFreqs[i]     = (gen.Next() & 0xF) + 1;
                freqPointers[i] = fp;
                proxPointers[i] = pp;
                fp += (gen.Next() & 0xF) + 1;
                ;
                pp += (gen.Next() & 0xF) + 1;
                ;
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to generate values");

            start = System.DateTime.Now;

            Directory  store = FSDirectory.GetDirectory("test.store", true);
            FieldInfos fis   = new FieldInfos();

            TermInfosWriter writer = new TermInfosWriter(store, "words", fis);

            fis.Add("word", false);

            for (int i = 0; i < keys.Count; i++)
            {
                writer.Add((Term)keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i]));
            }

            writer.Close();

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to write table");

            System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes");

            start = System.DateTime.Now;

            TermInfosReader reader = new TermInfosReader(store, "words", fis);

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to open table");

            start = System.DateTime.Now;

            SegmentTermEnum enumerator = reader.Terms();

            for (int i = 0; i < keys.Count; i++)
            {
                enumerator.Next();
                Term key = (Term)keys[i];
                if (!key.Equals(enumerator.Term()))
                {
                    throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i);
                }
                TermInfo ti = enumerator.TermInfo();
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words");

            start = System.DateTime.Now;

            for (int i = 0; i < keys.Count; i++)
            {
                Term     key = (Term)keys[i];
                TermInfo ti  = reader.Get(key);
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write((end.Ticks - start.Ticks) / (float)keys.Count);
            System.Console.Out.WriteLine(" average milliseconds per lookup");

            TermEnum e = reader.Terms(new Term("word", "azz"));

            System.Console.Out.WriteLine("Word after azz is " + e.Term().text);

            reader.Close();

            store.Close();
        }
Ejemplo n.º 25
0
		/// <summary>Adds a new <<fieldNumber, termText>, TermInfo> pair to the set.
		/// Term must be lexicographically greater than all previous Terms added.
		/// TermInfo pointers must be positive and greater than all previous.
		/// </summary>
		internal void  Add(int fieldNumber, char[] termText, int termTextStart, int termTextLength, TermInfo ti)
		{
			
			System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termText, termTextStart, termTextLength) < 0 ||
				(isIndex && termTextLength == 0 && lastTermTextLength == 0),
				"Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) +  "(number " + fieldNumber + ")" + 
				" lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" + 
				" text=" + new String(termText, termTextStart, termTextLength) + " lastText=" + new String(lastTermText, 0, lastTermTextLength));
			
			System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
			System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");
			
			if (!isIndex && size % indexInterval == 0)
				other.Add(lastFieldNumber, lastTermText, 0, lastTermTextLength, lastTi); // add an index term
			
			WriteTerm(fieldNumber, termText, termTextStart, termTextLength); // write term
			
			output.WriteVInt(ti.docFreq); // write doc freq
			output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
			output.WriteVLong(ti.proxPointer - lastTi.proxPointer);
			
			if (ti.docFreq >= skipInterval)
			{
				output.WriteVInt(ti.skipOffset);
			}
			
			if (isIndex)
			{
				output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
				lastIndexPointer = other.output.GetFilePointer(); // write pointer
			}
			
			if (lastTermText.Length < termTextLength)
			{
				lastTermText = new char[(int) (termTextLength * 1.25)];
			}
			Array.Copy(termText, termTextStart, lastTermText, 0, termTextLength);
			lastTermTextLength = termTextLength;
			lastFieldNumber = fieldNumber;
			
			lastTi.Set(ti);
			size++;
		}
Ejemplo n.º 26
0
        /// <summary>Adds a new <<fieldNumber, termBytes>, TermInfo> pair to the set.
        /// Term must be lexicographically greater than all previous Terms added.
        /// TermInfo pointers must be positive and greater than all previous.
        /// </summary>
        internal void Add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti)
        {
            System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 ||
                (isIndex && termBytesLength == 0 && lastTermBytesLength == 0),
                "Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) +  "(number " + fieldNumber + ")" +
                " lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" +
                " text=" + System.Text.Encoding.UTF8.GetString(termBytes, 0, termBytesLength) + " lastText=" + System.Text.Encoding.UTF8.GetString(lastTermBytes, 0, lastTermBytesLength));

            System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
            System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");

            if (!isIndex && size % indexInterval == 0)
                other.Add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term

            WriteTerm(fieldNumber, termBytes, termBytesLength); // write term

            output.WriteVInt(ti.docFreq); // write doc freq
            output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
            output.WriteVLong(ti.proxPointer - lastTi.proxPointer);

            if (ti.docFreq >= skipInterval)
            {
                output.WriteVInt(ti.skipOffset);
            }

            if (isIndex)
            {
                output.WriteVLong(other.output.GetFilePointer() - lastIndexPointer);
                lastIndexPointer = other.output.GetFilePointer(); // write pointer
            }

            lastFieldNumber = fieldNumber;
            lastTi.Set(ti);
            size++;
        }
Ejemplo n.º 27
0
 internal void Add(Term term, TermInfo ti)
 {
     UnicodeUtil.UTF16toUTF8(term.text, 0, term.text.Length, utf8Result);
     Add(fieldInfos.FieldNumber(term.field), utf8Result.result, utf8Result.length, ti);
 }
Ejemplo n.º 28
0
        private void  WritePostings(Posting[] postings, System.String segment)
        {
            IndexOutput       freq = null, prox = null;
            TermInfosWriter   tis              = null;
            TermVectorsWriter termVectorWriter = null;

            try
            {
                //open files for inverse index storage
                freq = directory.CreateOutput(segment + ".frq");
                prox = directory.CreateOutput(segment + ".prx");
                tis  = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
                TermInfo      ti           = new TermInfo();
                System.String currentField = null;

                for (int i = 0; i < postings.Length; i++)
                {
                    Posting posting = postings[i];

                    // add an entry to the dictionary with pointers to prox and freq files
                    ti.Set(1, freq.GetFilePointer(), prox.GetFilePointer(), -1);
                    tis.Add(posting.term, ti);

                    // add an entry to the freq file
                    int postingFreq = posting.freq;
                    if (postingFreq == 1)
                    {
                        // optimize freq=1
                        freq.WriteVInt(1);
                    }
                    // set low bit of doc num.
                    else
                    {
                        freq.WriteVInt(0);                         // the document number
                        freq.WriteVInt(postingFreq);               // frequency in doc
                    }

                    int   lastPosition = 0;                   // write positions
                    int[] positions    = posting.positions;
                    for (int j = 0; j < postingFreq; j++)
                    {
                        // use delta-encoding
                        int position = positions[j];
                        prox.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                    // check to see if we switched to a new field
                    System.String termField = posting.term.Field();
                    if (currentField != termField)
                    {
                        // changing field - see if there is something to save
                        currentField = termField;
                        FieldInfo fi = fieldInfos.FieldInfo(currentField);
                        if (fi.storeTermVector)
                        {
                            if (termVectorWriter == null)
                            {
                                termVectorWriter = new TermVectorsWriter(directory, segment, fieldInfos);
                                termVectorWriter.OpenDocument();
                            }
                            termVectorWriter.OpenField(currentField);
                        }
                        else if (termVectorWriter != null)
                        {
                            termVectorWriter.CloseField();
                        }
                    }
                    if (termVectorWriter != null && termVectorWriter.IsFieldOpen())
                    {
                        termVectorWriter.AddTerm(posting.term.Text(), postingFreq, posting.positions, posting.offsets);
                    }
                }
                if (termVectorWriter != null)
                {
                    termVectorWriter.CloseDocument();
                }
            }
            finally
            {
                // make an effort to close all streams we can but remember and re-throw
                // the first exception encountered in this process
                System.IO.IOException keep = null;
                if (freq != null)
                {
                    try
                    {
                        freq.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (prox != null)
                {
                    try
                    {
                        prox.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (tis != null)
                {
                    try
                    {
                        tis.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (termVectorWriter != null)
                {
                    try
                    {
                        termVectorWriter.Close();
                    }
                    catch (System.IO.IOException e)
                    {
                        if (keep == null)
                        {
                            keep = e;
                        }
                    }
                }
                if (keep != null)
                {
                    throw new System.IO.IOException(keep.StackTrace);
                }
            }
        }
Ejemplo n.º 29
0
 internal virtual void Seek(TermInfo ti, Term term)
 {
     count = 0;
     FieldInfo fi = parent.fieldInfos.FieldInfo(term.field);
     currentFieldOmitTf = (fi != null) ? fi.omitTf : false;
     currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
     if (ti == null)
     {
         df = 0;
     }
     else
     {
         df = ti.docFreq;
         doc = 0;
         freqBasePointer = ti.freqPointer;
         proxBasePointer = ti.proxPointer;
         skipPointer = freqBasePointer + ti.skipOffset;
         freqStream.Seek(freqBasePointer);
         haveSkipped = false;
     }
 }
Ejemplo n.º 30
0
        public virtual void  Seek(Term term)
        {
            TermInfo ti = parent.core.GetTermsReader().Get(term);

            Seek(ti, term);
        }