Esempio n. 1
0
        public /*internal*/ void  AddDocument(System.String segment, Document doc)
        {
            // write field names
            fieldInfos = new FieldInfos();
            fieldInfos.Add(doc);
            fieldInfos.Write(directory, segment + ".fnm");

            // write field values
            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                fieldsWriter.AddDocument(doc);
            }
            finally
            {
                fieldsWriter.Close();
            }

            // invert doc into postingTable
            postingTable.Clear();                        // clear postingTable
            fieldLengths   = new int[fieldInfos.Size()]; // init fieldLengths
            fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
            fieldOffsets   = new int[fieldInfos.Size()]; // init fieldOffsets

            fieldBoosts = new float[fieldInfos.Size()];  // init fieldBoosts
            float boost = doc.GetBoost();

            for (int i = 0; i < fieldBoosts.Length; i++)
            {
                fieldBoosts[i] = boost;
            }

            InvertDocument(doc);

            // sort postingTable into an array
            Posting[] postings = SortPostingTable();

            /*
             * for (int i = 0; i < postings.length; i++) {
             * Posting posting = postings[i];
             * System.out.print(posting.term);
             * System.out.print(" freq=" + posting.freq);
             * System.out.print(" pos=");
             * System.out.print(posting.positions[0]);
             * for (int j = 1; j < posting.freq; j++)
             * System.out.print("," + posting.positions[j]);
             * System.out.println("");
             * }
             */

            // write postings
            WritePostings(postings, segment);

            // write norms of indexed fields
            WriteNorms(segment);
        }
Esempio n. 2
0
		public void  AddDocument(System.String segment, Document doc)
		{
			// write field names
			fieldInfos = new FieldInfos();
			fieldInfos.Add(doc);
			fieldInfos.Write(directory, segment + ".fnm");
			
			// write field values
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			try
			{
				fieldsWriter.AddDocument(doc);
			}
			finally
			{
				fieldsWriter.Close();
			}
			
			// invert doc into postingTable
			postingTable.Clear(); // clear postingTable
			fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths
			fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
			fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets
			
			fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts
			float boost = doc.GetBoost();
			for (int i = 0; i < fieldBoosts.Length; i++)
			{
				fieldBoosts[i] = boost;
			}
			
			InvertDocument(doc);
			
			// sort postingTable into an array
			Posting[] postings = SortPostingTable();
			
			/*
			for (int i = 0; i < postings.length; i++) {
			Posting posting = postings[i];
			System.out.print(posting.term);
			System.out.print(" freq=" + posting.freq);
			System.out.print(" pos=");
			System.out.print(posting.positions[0]);
			for (int j = 1; j < posting.freq; j++)
			System.out.print("," + posting.positions[j]);
			System.out.println("");
			}
			*/
			
			// write postings
			WritePostings(postings, segment);
			
			// write norms of indexed fields
			WriteNorms(segment);
		}
Esempio n. 3
0
        public virtual void  Test()
        {
            //Positive test of FieldInfos
            Assert.IsTrue(testDoc != null);
            FieldInfos fieldInfos = new FieldInfos();

            fieldInfos.Add(testDoc);
            //Since the complement is stored as well in the fields map
            Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count);             //this is all b/c we are using the no-arg constructor
            RAMDirectory dir = new RAMDirectory();

            System.String name   = "testFile";
            IndexOutput   output = dir.CreateOutput(name, null);

            Assert.IsTrue(output != null);
            //Use a RAMOutputStream

            try
            {
                fieldInfos.Write(output);
                output.Close();
                Assert.IsTrue(output.Length > 0);
                FieldInfos readIn = new FieldInfos(dir, name, null);
                Assert.IsTrue(fieldInfos.Size() == readIn.Size());
                FieldInfo info = readIn.FieldInfo("textField1");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector_ForNUnit == false);
                Assert.IsTrue(info.omitNorms_ForNUnit == false);

                info = readIn.FieldInfo("textField2");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector_ForNUnit == true);
                Assert.IsTrue(info.omitNorms_ForNUnit == false);

                info = readIn.FieldInfo("textField3");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector_ForNUnit == false);
                Assert.IsTrue(info.omitNorms_ForNUnit == true);

                info = readIn.FieldInfo("omitNorms");
                Assert.IsTrue(info != null);
                Assert.IsTrue(info.storeTermVector_ForNUnit == false);
                Assert.IsTrue(info.omitNorms_ForNUnit == true);

                dir.Close();
            }
            catch (System.IO.IOException)
            {
                Assert.IsTrue(false);
            }
        }
Esempio n. 4
0
		public virtual void  Test()
		{
			//Positive test of FieldInfos
			Assert.IsTrue(testDoc != null);
			FieldInfos fieldInfos = new FieldInfos();
			fieldInfos.Add(testDoc);
			//Since the complement is stored as well in the fields map
			Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count); //this is all b/c we are using the no-arg constructor
			RAMDirectory dir = new RAMDirectory();
			System.String name = "testFile";
			IndexOutput output = dir.CreateOutput(name);
			Assert.IsTrue(output != null);
			//Use a RAMOutputStream
			
			try
			{
				fieldInfos.Write(output);
				output.Close();
				Assert.IsTrue(output.Length() > 0);
				FieldInfos readIn = new FieldInfos(dir, name);
				Assert.IsTrue(fieldInfos.Size() == readIn.Size());
				FieldInfo info = readIn.FieldInfo("textField1");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == false);
				Assert.IsTrue(info.omitNorms_ForNUnit == false);
				
				info = readIn.FieldInfo("textField2");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == true);
				Assert.IsTrue(info.omitNorms_ForNUnit == false);
				
				info = readIn.FieldInfo("textField3");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == false);
				Assert.IsTrue(info.omitNorms_ForNUnit == true);
				
				info = readIn.FieldInfo("omitNorms");
				Assert.IsTrue(info != null);
				Assert.IsTrue(info.storeTermVector_ForNUnit == false);
				Assert.IsTrue(info.omitNorms_ForNUnit == true);
				
				dir.Close();
			}
			catch (System.IO.IOException e)
			{
				Assert.IsTrue(false);
			}
		}
Esempio n. 5
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
            // in  merge mode, we use this FieldSelector
            FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
Esempio n. 6
0
        public override void Flush(IDictionary <DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state)
        {
            Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > childThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >();
            foreach (DocFieldProcessorPerThread perThread in threads.Keys)
            {
                childThreadsAndFields[perThread.consumer] = perThread.Fields();
                perThread.TrimFields(state);
            }
            fieldsWriter.Flush(state);
            consumer.Flush(childThreadsAndFields, state);

            // Important to save after asking consumer to flush so
            // consumer can alter the FieldInfo* if necessary.  EG,
            // FreqProxTermsWriter does this with
            // FieldInfo.storePayload.
            System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            fieldInfos.Write(state.directory, fileName);
            state.flushedFiles.Add(fileName);
        }
Esempio n. 7
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos();             // merge field names
            int docCount = 0;

            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader)readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader)readers[i];
                    int         maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                    {
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j));
                            docCount++;
                        }
                    }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return(docCount);
        }
        internal override void Flush(ICollection <object> threads, DocumentsWriter.FlushState state)
        {
            IDictionary <object, ICollection <object> > childThreadsAndFields = new Dictionary <object, ICollection <object> >();
            IEnumerator <object> it = threads.GetEnumerator();

            while (it.MoveNext())
            {
                DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread)it.Current;
                childThreadsAndFields[perThread.consumer] = perThread.Fields();
                perThread.trimFields(state);
            }

            consumer.flush(childThreadsAndFields, state);

            // Important to save after asking consumer to flush so
            // consumer can alter the FieldInfo* if necessary.  EG,
            // FreqProxTermsWriter does this with
            // FieldInfo.storePayload.
            fieldInfos.Write(state.directory, state.segmentName + ".fnm");
        }
Esempio n. 9
0
        public override void  Flush(System.Collections.ICollection threads, SegmentWriteState state)
        {
            System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable();
            System.Collections.IEnumerator it = threads.GetEnumerator();
            while (it.MoveNext())
            {
                DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread)((System.Collections.DictionaryEntry)it.Current).Key;
                childThreadsAndFields[perThread.consumer] = perThread.Fields();
                perThread.TrimFields(state);
            }
            fieldsWriter.Flush(state);
            consumer.Flush(childThreadsAndFields, state);

            // Important to save after asking consumer to flush so
            // consumer can alter the FieldInfo* if necessary.  EG,
            // FreqProxTermsWriter does this with
            // FieldInfo.storePayload.
            System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            fieldInfos.Write(state.directory, fileName);
            SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName);
        }
Esempio n. 10
0
        /// <summary> </summary>
        /// <returns> The number of documents in all of the readers
        /// </returns>
        /// <throws>  IOException </throws>
        private int MergeFields()
        {
            fieldInfos = new FieldInfos(); // merge field names
            int docCount = 0;
            for (int i = 0; i < readers.Count; i++)
            {
                IndexReader reader = (IndexReader) readers[i];
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
                AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
                fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
            }
            fieldInfos.Write(directory, segment + ".fnm");

            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
            try
            {
                for (int i = 0; i < readers.Count; i++)
                {
                    IndexReader reader = (IndexReader) readers[i];
                    int maxDoc = reader.MaxDoc();
                    for (int j = 0; j < maxDoc; j++)
                        if (!reader.IsDeleted(j))
                        {
                            // skip deleted docs
                            fieldsWriter.AddDocument(reader.Document(j));
                            docCount++;
                        }
                }
            }
            finally
            {
                fieldsWriter.Close();
            }
            return docCount;
        }
Esempio n. 11
0
		/// <summary> </summary>
		/// <returns> The number of documents in all of the readers
		/// </returns>
		/// <throws>  IOException </throws>
		private int MergeFields()
		{
			fieldInfos = new FieldInfos(); // merge field names
			int docCount = 0;
			for (int i = 0; i < readers.Count; i++)
			{
				IndexReader reader = (IndexReader) readers[i];
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
				AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
				fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
			}
			fieldInfos.Write(directory, segment + ".fnm");
			
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			
			// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
			// in  merge mode, we use this FieldSelector
			FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
			
			try
			{
				for (int i = 0; i < readers.Count; i++)
				{
					IndexReader reader = (IndexReader) readers[i];
					int maxDoc = reader.MaxDoc();
					for (int j = 0; j < maxDoc; j++)
						if (!reader.IsDeleted(j))
						{
							// skip deleted docs
							fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
							docCount++;
						}
				}
			}
			finally
			{
				fieldsWriter.Close();
			}
			return docCount;
		}