Example #1
0
		public void  AddDocument(System.String segment, Document doc)
		{
			// write field names
			fieldInfos = new FieldInfos();
			fieldInfos.Add(doc);
			fieldInfos.Write(directory, segment + ".fnm");
			
			// write field values
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			try
			{
				fieldsWriter.AddDocument(doc);
			}
			finally
			{
				fieldsWriter.Close();
			}
			
			// invert doc into postingTable
			postingTable.Clear(); // clear postingTable
			fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths
			fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
			fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets
			
			fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts
			float boost = doc.GetBoost();
			for (int i = 0; i < fieldBoosts.Length; i++)
			{
				fieldBoosts[i] = boost;
			}
			
			InvertDocument(doc);
			
			// sort postingTable into an array
			Posting[] postings = SortPostingTable();
			
			/*
			for (int i = 0; i < postings.length; i++) {
			Posting posting = postings[i];
			System.out.print(posting.term);
			System.out.print(" freq=" + posting.freq);
			System.out.print(" pos=");
			System.out.print(posting.positions[0]);
			for (int j = 1; j < posting.freq; j++)
			System.out.print("," + posting.positions[j]);
			System.out.println("");
			}
			*/
			
			// write postings
			WritePostings(postings, segment);
			
			// write norms of indexed fields
			WriteNorms(segment);
		}
		public override void SetUp()
		{
			base.SetUp();
			fieldInfos = new FieldInfos();
			DocHelper.SetupDoc(testDoc);
			fieldInfos.Add(testDoc);
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetUseCompoundFile(false);
			writer.AddDocument(testDoc);
			writer.Close();
			segmentName = writer.NewestSegment().name;
		}
Example #3
0
		public virtual void  Test()
		{
			//Positive test of FieldInfos
			Assert.IsTrue(testDoc != null);
			FieldInfos fieldInfos = new FieldInfos();
			fieldInfos.Add(testDoc);
			//Since the complement is stored as well in the fields map
			Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count); //this is all b/c we are using the no-arg constructor
			RAMDirectory dir = new RAMDirectory();
			System.String name = "testFile";
			IndexOutput output = dir.CreateOutput(name);
			Assert.IsTrue(output != null);
			//Use a RAMOutputStream
			
			try
			{
				fieldInfos.Write(output);
				output.Close();
				Assert.IsTrue(output.Length() > 0);
				FieldInfos readIn = new FieldInfos(dir, name);
				Assert.IsTrue(fieldInfos.Size() == readIn.Size());
				FieldInfo info = readIn.FieldInfo("textField1");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == false);
				Assert.IsTrue(info.omitNorms_ForNUnit == false);
				
				info = readIn.FieldInfo("textField2");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == true);
				Assert.IsTrue(info.omitNorms_ForNUnit == false);
				
				info = readIn.FieldInfo("textField3");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == false);
				Assert.IsTrue(info.omitNorms_ForNUnit == true);
				
				info = readIn.FieldInfo("omitNorms");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == false);
				Assert.IsTrue(info.omitNorms_ForNUnit == true);
				
				dir.Close();
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
		}
Example #4
0
		protected virtual void  SetUp()
		{
			fieldInfos = new FieldInfos();
			DocHelper.SetupDoc(testDoc);
			fieldInfos.Add(testDoc);
			DocumentWriter writer = new DocumentWriter(dir, new WhitespaceAnalyzer(), Similarity.GetDefault(), 50);
			Assert.IsTrue(writer != null);
			try
			{
				writer.AddDocument("test", testDoc);
			}
			catch (System.IO.IOException e)
			{
				
			}
		}
Example #5
0
        public virtual void  Test()
        {
            //Positive test of FieldInfos
            Assert.IsTrue(testDoc != null);
            FieldInfos fieldInfos = new FieldInfos();

            fieldInfos.Add(testDoc);
            //Since the complement is stored as well in the fields map
            Assert.IsTrue(fieldInfos.Size() == 7);             //this is 7 b/c we are using the no-arg constructor
            RAMDirectory dir = new RAMDirectory();

            System.String name   = "testFile";
            OutputStream  output = dir.CreateFile(name);

            Assert.IsTrue(output != null);
            //Use a RAMOutputStream

            try
            {
                fieldInfos.Write(output);
                output.Close();
                Assert.IsTrue(output.Length() > 0);
                FieldInfos readIn = new FieldInfos(dir, name);
                Assert.IsTrue(fieldInfos.Size() == readIn.Size());
                FieldInfo info = readIn.FieldInfo("textField1");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector == false);

                info = readIn.FieldInfo("textField2");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector == true);

                dir.Close();
            }
            catch (System.IO.IOException e)
            {
                Assert.IsTrue(false);
            }
        }
		public override void  SetUp()
		{
			base.SetUp();
			fieldInfos = new FieldInfos();
			DocHelper.SetupDoc(testDoc);
			fieldInfos.Add(testDoc);
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.UseCompoundFile = false;
			writer.AddDocument(testDoc);
			writer.Close();
		}
Example #7
0
        private void AddIndexed(IndexReader reader, FieldInfos fInfos, System.Collections.Generic.ICollection<string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions)
		{
			System.Collections.Generic.IEnumerator<string> i = names.GetEnumerator();
			while (i.MoveNext())
			{
                System.String field = i.Current;
				fInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field), storePayloads, omitTFAndPositions);
			}
		}
 private void AddIndexed(IndexReader reader, FieldInfos fInfos, ICollection<string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions)
 {
     foreach (var field in names)
     {
         fInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector,
                    !reader.HasNorms(field), storePayloads, omitTFAndPositions);
     }
 }
Example #9
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos(); // merge field names
            int docCount = 0;
            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader) readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader) readers[i];
                    int maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j));
                            docCount++;
                        }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return docCount;
        }
Example #10
0
 private void AddIndexed(IndexReader reader, FieldInfos fieldInfos, System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
 {
     System.Collections.IEnumerator i = names.GetEnumerator();
     while (i.MoveNext())
     {
         System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current;
         System.String field = (System.String) e.Key;
         fieldInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field));
     }
 }
Example #11
0
        // FIXME: OG: remove hard-coded file names
        public static void  Test()
        {
            System.IO.FileInfo file = new System.IO.FileInfo("words.txt");
            System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes");

            System.DateTime start = System.DateTime.Now;

            System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
            System.IO.FileStream         ws   = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
            System.IO.StreamReader       wr   = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding);

            for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine())
            {
                keys.Add(new Term("word", key));
            }
            wr.Close();

            System.DateTime end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words");

            start = System.DateTime.Now;

            System.Random gen = new System.Random((System.Int32) 1251971);
            long          fp  = (gen.Next() & 0xF) + 1;
            long          pp  = (gen.Next() & 0xF) + 1;

            int[]  docFreqs     = new int[keys.Count];
            long[] freqPointers = new long[keys.Count];
            long[] proxPointers = new long[keys.Count];
            for (int i = 0; i < keys.Count; i++)
            {
                docFreqs[i]     = (gen.Next() & 0xF) + 1;
                freqPointers[i] = fp;
                proxPointers[i] = pp;
                fp += (gen.Next() & 0xF) + 1;
                ;
                pp += (gen.Next() & 0xF) + 1;
                ;
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to generate values");

            start = System.DateTime.Now;

            Directory  store = FSDirectory.GetDirectory("test.store", true);
            FieldInfos fis   = new FieldInfos();

            TermInfosWriter writer = new TermInfosWriter(store, "words", fis);

            fis.Add("word", false);

            for (int i = 0; i < keys.Count; i++)
            {
                writer.Add((Term)keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i]));
            }

            writer.Close();

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to write table");

            System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes");

            start = System.DateTime.Now;

            TermInfosReader reader = new TermInfosReader(store, "words", fis);

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to open table");

            start = System.DateTime.Now;

            SegmentTermEnum enumerator = reader.Terms();

            for (int i = 0; i < keys.Count; i++)
            {
                enumerator.Next();
                Term key = (Term)keys[i];
                if (!key.Equals(enumerator.Term()))
                {
                    throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i);
                }
                TermInfo ti = enumerator.TermInfo();
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write(end.Ticks - start.Ticks);
            System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words");

            start = System.DateTime.Now;

            for (int i = 0; i < keys.Count; i++)
            {
                Term     key = (Term)keys[i];
                TermInfo ti  = reader.Get(key);
                if (ti.docFreq != docFreqs[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
                }
                if (ti.freqPointer != freqPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
                }
                if (ti.proxPointer != proxPointers[i])
                {
                    throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
                }
            }

            end = System.DateTime.Now;

            System.Console.Out.Write((end.Ticks - start.Ticks) / (float)keys.Count);
            System.Console.Out.WriteLine(" average milliseconds per lookup");

            TermEnum e = reader.Terms(new Term("word", "azz"));

            System.Console.Out.WriteLine("Word after azz is " + e.Term().text);

            reader.Close();

            store.Close();
        }
Example #12
0
		// FIXME: OG: remove hard-coded file names
		public static void  Test()
		{
			
			System.IO.FileInfo file = new System.IO.FileInfo("words.txt");
			System.Console.Out.WriteLine(" reading word file containing " + file.Length + " bytes");
			
			System.DateTime start = System.DateTime.Now;
			
			System.Collections.ArrayList keys = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
			System.IO.FileStream ws = new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
			System.IO.StreamReader wr = new System.IO.StreamReader(new System.IO.StreamReader(ws, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(ws, System.Text.Encoding.Default).CurrentEncoding);
			
			for (System.String key = wr.ReadLine(); key != null; key = wr.ReadLine())
				keys.Add(new Term("word", key));
			wr.Close();
			
			System.DateTime end = System.DateTime.Now;
			
			System.Console.Out.Write(end.Ticks - start.Ticks);
			System.Console.Out.WriteLine(" milliseconds to read " + keys.Count + " words");
			
			start = System.DateTime.Now;
			
			System.Random gen = new System.Random((System.Int32) 1251971);
			long fp = (gen.Next() & 0xF) + 1;
			long pp = (gen.Next() & 0xF) + 1;
			int[] docFreqs = new int[keys.Count];
			long[] freqPointers = new long[keys.Count];
			long[] proxPointers = new long[keys.Count];
			for (int i = 0; i < keys.Count; i++)
			{
				docFreqs[i] = (gen.Next() & 0xF) + 1;
				freqPointers[i] = fp;
				proxPointers[i] = pp;
				fp += (gen.Next() & 0xF) + 1;
				;
				pp += (gen.Next() & 0xF) + 1;
				;
			}
			
			end = System.DateTime.Now;
			
			System.Console.Out.Write(end.Ticks - start.Ticks);
			System.Console.Out.WriteLine(" milliseconds to generate values");
			
			start = System.DateTime.Now;
			
			Directory store = FSDirectory.GetDirectory("test.store", true);
			FieldInfos fis = new FieldInfos();
			
			TermInfosWriter writer = new TermInfosWriter(store, "words", fis);
			fis.Add("word", false);
			
			for (int i = 0; i < keys.Count; i++)
				writer.Add((Term) keys[i], new TermInfo(docFreqs[i], freqPointers[i], proxPointers[i]));
			
			writer.Close();
			
			end = System.DateTime.Now;
			
			System.Console.Out.Write(end.Ticks - start.Ticks);
			System.Console.Out.WriteLine(" milliseconds to write table");
			
			System.Console.Out.WriteLine(" table occupies " + store.FileLength("words.tis") + " bytes");
			
			start = System.DateTime.Now;
			
			TermInfosReader reader = new TermInfosReader(store, "words", fis);
			
			end = System.DateTime.Now;
			
			System.Console.Out.Write(end.Ticks - start.Ticks);
			System.Console.Out.WriteLine(" milliseconds to open table");
			
			start = System.DateTime.Now;
			
			SegmentTermEnum enumerator = reader.Terms();
			for (int i = 0; i < keys.Count; i++)
			{
				enumerator.Next();
				Term key = (Term) keys[i];
				if (!key.Equals(enumerator.Term()))
				{
					throw new System.Exception("wrong term: " + enumerator.Term() + ", expected: " + key + " at " + i);
				}
				TermInfo ti = enumerator.TermInfo();
				if (ti.docFreq != docFreqs[i])
					throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
				if (ti.freqPointer != freqPointers[i])
					throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
				if (ti.proxPointer != proxPointers[i])
					throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
			}
			
			end = System.DateTime.Now;
			
			System.Console.Out.Write(end.Ticks - start.Ticks);
			System.Console.Out.WriteLine(" milliseconds to iterate over " + keys.Count + " words");
			
			start = System.DateTime.Now;
			
			for (int i = 0; i < keys.Count; i++)
			{
				Term key = (Term) keys[i];
				TermInfo ti = reader.Get(key);
				if (ti.docFreq != docFreqs[i])
					throw new System.Exception("wrong value: " + System.Convert.ToString(ti.docFreq, 16) + ", expected: " + System.Convert.ToString(docFreqs[i], 16) + " at " + i);
				if (ti.freqPointer != freqPointers[i])
					throw new System.Exception("wrong value: " + System.Convert.ToString(ti.freqPointer, 16) + ", expected: " + System.Convert.ToString(freqPointers[i], 16) + " at " + i);
				if (ti.proxPointer != proxPointers[i])
					throw new System.Exception("wrong value: " + System.Convert.ToString(ti.proxPointer, 16) + ", expected: " + System.Convert.ToString(proxPointers[i], 16) + " at " + i);
			}
			
			end = System.DateTime.Now;
			
			System.Console.Out.Write((end.Ticks - start.Ticks) / (float) keys.Count);
			System.Console.Out.WriteLine(" average milliseconds per lookup");
			
			TermEnum e = reader.Terms(new Term("word", "azz"));
			System.Console.Out.WriteLine("Word after azz is " + e.Term().text);
			
			reader.Close();
			
			store.Close();
		}
        internal override DocumentsWriter.DocWriter processDocument()
        {
            consumer.startDocument();
            Document doc = docState.doc;

            System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            System.Collections.IList docFields = doc.GetFields();
            int numDocFields = docFields.Count;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            for (int i = 0; i < numDocFields; i++)
            {
                Fieldable field     = (Fieldable)docFields[i];
                string    fieldName = field.Name();

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed(), field.IsTermVectorStored(),
                                                  field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(),
                                                  field.GetOmitNorms(), false, field.GetOmitTf());

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        rehash();
                    }
                }
                else
                {
                    fp.fieldInfo.update(field.IsIndexed(), field.IsTermVectorStored(),
                                        field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(),
                                        field.GetOmitNorms(), false, field.GetOmitTf());
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        System.Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                if (fp.fieldCount == fp.fields.Length)
                {
                    Fieldable[] newArray = new Fieldable[fp.fields.Length * 2];
                    System.Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
                    fp.fields = newArray;
                }

                fp.fields[fp.fieldCount++] = field;
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            quickSort(fields, 0, fieldCount - 1);

            for (int i = 0; i < fieldCount; i++)
            {
                fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
            }

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
            }

            return(consumer.finishDocument());
        }
Example #14
0
		/// <summary> </summary>
		/// <returns> The number of documents in all of the readers
		/// </returns>
		/// <throws>  IOException </throws>
		private int MergeFields()
		{
			fieldInfos = new FieldInfos(); // merge field names
			int docCount = 0;
			for (int i = 0; i < readers.Count; i++)
			{
				IndexReader reader = (IndexReader) readers[i];
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
				fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
			}
			fieldInfos.Write(directory, segment + ".fnm");
			
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			
			// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
			// in  merge mode, we use this FieldSelector
			FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
			
			try
			{
				for (int i = 0; i < readers.Count; i++)
				{
					IndexReader reader = (IndexReader) readers[i];
					int maxDoc = reader.MaxDoc();
					for (int j = 0; j < maxDoc; j++)
						if (!reader.IsDeleted(j))
						{
							// skip deleted docs
							fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
							docCount++;
						}
				}
			}
			finally
			{
				fieldsWriter.Close();
			}
			return docCount;
		}