Example #1
0
 /// <summary>Adds Field info for a Document. </summary>
 public void  Add(Document doc)
 {
     foreach (Field field in doc.Fields())
     {
         Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored());
     }
 }
Example #2
0
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            foreach (Field field  in doc.Fields())
            {
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            foreach (Field field in doc.Fields())
            {
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                    {
                        bits |= 1;
                    }
                    fieldsStream.WriteByte(bits);

                    fieldsStream.WriteString(field.StringValue());
                }
            }
        }
Example #3
0
        // Tokenizes the fields of a document into Postings.
        private void  InvertDocument(Document doc)
        {
            foreach (Field field in doc.Fields())
            {
                System.String fieldName   = field.Name();
                int           fieldNumber = fieldInfos.FieldNumber(fieldName);

                int length   = fieldLengths[fieldNumber];   // length of Field
                int position = fieldPositions[fieldNumber]; // position in Field

                if (field.IsIndexed())
                {
                    if (!field.IsTokenized())
                    {
                        // un-tokenized Field
                        AddPosition(fieldName, field.StringValue(), position++);
                        length++;
                    }
                    else
                    {
                        System.IO.TextReader reader; // find or make Reader
                        if (field.ReaderValue() != null)
                        {
                            reader = field.ReaderValue();
                        }
                        else if (field.StringValue() != null)
                        {
                            reader = new System.IO.StringReader(field.StringValue());
                        }
                        else
                        {
                            throw new System.ArgumentException("Field must have either String or Reader value");
                        }

                        // Tokenize Field and add to postingTable
                        TokenStream stream = analyzer.TokenStream(fieldName, reader);
                        try
                        {
                            for (Token t = stream.Next(); t != null; t = stream.Next())
                            {
                                position += (t.GetPositionIncrement() - 1);
                                AddPosition(fieldName, t.TermText(), position++);
                                if (++length > maxFieldLength)
                                {
                                    break;
                                }
                            }
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    fieldLengths[fieldNumber]   = length;   // save Field length
                    fieldPositions[fieldNumber] = position; // save Field position
                    fieldBoosts[fieldNumber]   *= field.GetBoost();
                }
            }
        }
		internal void  AddDocument(Document doc)
		{
			indexStream.WriteLong(fieldsStream.GetFilePointer());
			
			int storedCount = 0;
            foreach (Field field  in doc.Fields())
            {
				if (field.IsStored())
					storedCount++;
			}
			fieldsStream.WriteVInt(storedCount);
			
            foreach (Field field in doc.Fields())
            {
				if (field.IsStored())
				{
					fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));
					
					byte bits = 0;
					if (field.IsTokenized())
						bits |= 1;
					fieldsStream.WriteByte(bits);
					
					fieldsStream.WriteString(field.StringValue());
				}
			}
		}
Example #5
0
		/// <summary>Adds Field info for a Document. </summary>
		public void  Add(Document doc)
		{
            foreach (Field field in doc.Fields())
            {
                Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored());
            }
		}
Example #6
0
        /*internal*/ public void  AddDocument(System.String segment, Document doc)
        {
            // write Field names
            fieldInfos = new FieldInfos();
            fieldInfos.Add(doc);
            fieldInfos.Write(directory, segment + ".fnm");

            // write Field values
            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                fieldsWriter.AddDocument(doc);
            }
            finally
            {
                fieldsWriter.Close();
            }

            // invert doc into postingTable
            postingTable.Clear();                        // clear postingTable
            fieldLengths   = new int[fieldInfos.Size()]; // init fieldLengths
            fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions

            fieldBoosts = new float[fieldInfos.Size()];  // init fieldBoosts
            float boost = doc.GetBoost();

            for (int i = 0; i < fieldBoosts.Length; i++)
            {
                fieldBoosts[i] = boost;
            }

            InvertDocument(doc);

            // sort postingTable into an array
            Posting[] postings = SortPostingTable();

            /*
             * for (int i = 0; i < postings.length; i++) {
             * Posting posting = postings[i];
             * System.out.print(posting.term);
             * System.out.print(" freq=" + posting.freq);
             * System.out.print(" pos=");
             * System.out.print(posting.positions[0]);
             * for (int j = 1; j < posting.freq; j++)
             * System.out.print("," + posting.positions[j]);
             * System.out.println("");
             * }
             */

            // write postings
            WritePostings(postings, segment);

            // write norms of indexed fields
            WriteNorms(doc, segment);
        }
Example #7
0
		/*internal*/ public void  AddDocument(System.String segment, Document doc)
		{
			// write Field names
			fieldInfos = new FieldInfos();
			fieldInfos.Add(doc);
			fieldInfos.Write(directory, segment + ".fnm");
			
			// write Field values
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			try
			{
				fieldsWriter.AddDocument(doc);
			}
			finally
			{
				fieldsWriter.Close();
			}
			
			// invert doc into postingTable
			postingTable.Clear(); // clear postingTable
			fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths
			fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
			
			fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts
            float boost = doc.GetBoost();
            for (int i = 0; i < fieldBoosts.Length; i++)
            {
                fieldBoosts[i] = boost;
            }
			
			InvertDocument(doc);
			
			// sort postingTable into an array
			Posting[] postings = SortPostingTable();
			
			/*
			for (int i = 0; i < postings.length; i++) {
			Posting posting = postings[i];
			System.out.print(posting.term);
			System.out.print(" freq=" + posting.freq);
			System.out.print(" pos=");
			System.out.print(posting.positions[0]);
			for (int j = 1; j < posting.freq; j++)
			System.out.print("," + posting.positions[j]);
			System.out.println("");
			}
			*/
			
			// write postings
			WritePostings(postings, segment);
			
			// write norms of indexed fields
			WriteNorms(doc, segment);
		}
        /// <summary> Adds a document to this index, using the provided analyzer instead of the
        /// value of {@link #GetAnalyzer()}.  If the document contains more than
        /// {@link #maxFieldLength} terms for a given Field, the remainder are
        /// discarded.
        /// </summary>
        public virtual void  AddDocument(Document doc, Analyzer analyzer)
        {
            DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength);

            System.String segmentName = NewSegmentName();
            dw.AddDocument(segmentName, doc);
            lock (this)
            {
                segmentInfos.Add(new SegmentInfo(segmentName, 1, ramDirectory));
                MaybeMergeSegments();
            }
        }
		public /*internal*/ Document Doc(int n)
		{
			indexStream.Seek(n * 8L);
			long position = indexStream.ReadLong();
			fieldsStream.Seek(position);
			
			Document doc = new Document();
			int numFields = fieldsStream.ReadVInt();
			for (int i = 0; i < numFields; i++)
			{
				int fieldNumber = fieldsStream.ReadVInt();
				FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
				
				byte bits = fieldsStream.ReadByte();
				
				doc.Add(new Field(fi.name, fieldsStream.ReadString(), true, fi.isIndexed, (bits & 1) != 0, fi.storeTermVector)); // vector
			}
			
			return doc;
		}
Example #10
0
 private void  WriteNorms(Document doc, System.String segment)
 {
     for (int n = 0; n < fieldInfos.Size(); n++)
     {
         FieldInfo fi = fieldInfos.FieldInfo(n);
         if (fi.isIndexed)
         {
             float        norm  = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]);
             OutputStream norms = directory.CreateFile(segment + ".f" + n);
             try
             {
                 norms.WriteByte(Monodoc.Lucene.Net.Search.Similarity.EncodeNorm(norm));
             }
             finally
             {
                 norms.Close();
             }
         }
     }
 }
Example #11
0
        public /*internal*/ Document Doc(int n)
        {
            indexStream.Seek(n * 8L);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            Document doc       = new Document();
            int      numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int       fieldNumber = fieldsStream.ReadVInt();
                FieldInfo fi          = fieldInfos.FieldInfo(fieldNumber);

                byte bits = fieldsStream.ReadByte();

                doc.Add(new Field(fi.name, fieldsStream.ReadString(), true, fi.isIndexed, (bits & 1) != 0, fi.storeTermVector));                 // vector
            }

            return(doc);
        }
 /// <summary> Adds a document to this index.  If the document contains more than
 /// {@link #maxFieldLength} terms for a given Field, the remainder are
 /// discarded.
 /// </summary>
 public virtual void  AddDocument(Document doc)
 {
     AddDocument(doc, analyzer);
 }
Example #13
0
		private void  WriteNorms(Document doc, System.String segment)
		{
			for (int n = 0; n < fieldInfos.Size(); n++)
			{
				FieldInfo fi = fieldInfos.FieldInfo(n);
				if (fi.isIndexed)
				{
					float norm = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]);
					OutputStream norms = directory.CreateFile(segment + ".f" + n);
					try
					{
						norms.WriteByte(Monodoc.Lucene.Net.Search.Similarity.EncodeNorm(norm));
					}
					finally
					{
						norms.Close();
					}
				}
			}
		}
Example #14
0
		// Tokenizes the fields of a document into Postings.
		private void  InvertDocument(Document doc)
		{
            foreach(Field field in doc.Fields())
            {
				System.String fieldName = field.Name();
				int fieldNumber = fieldInfos.FieldNumber(fieldName);
				
				int length = fieldLengths[fieldNumber]; // length of Field
				int position = fieldPositions[fieldNumber]; // position in Field
				
				if (field.IsIndexed())
				{
					if (!field.IsTokenized())
					{
						// un-tokenized Field
						AddPosition(fieldName, field.StringValue(), position++);
						length++;
					}
					else
					{
						System.IO.TextReader reader; // find or make Reader
						if (field.ReaderValue() != null)
							reader = field.ReaderValue();
						else if (field.StringValue() != null)
							reader = new System.IO.StringReader(field.StringValue());
						else
							throw new System.ArgumentException("Field must have either String or Reader value");
						
						// Tokenize Field and add to postingTable
						TokenStream stream = analyzer.TokenStream(fieldName, reader);
						try
						{
							for (Token t = stream.Next(); t != null; t = stream.Next())
							{
								position += (t.GetPositionIncrement() - 1);
								AddPosition(fieldName, t.TermText(), position++);
								if (++length > maxFieldLength)
									break;
							}
						}
						finally
						{
							stream.Close();
						}
					}
					
					fieldLengths[fieldNumber] = length; // save Field length
					fieldPositions[fieldNumber] = position; // save Field position
					fieldBoosts[fieldNumber] *= field.GetBoost();
				}
			}
		}
		/// <summary> Adds a document to this index, using the provided analyzer instead of the
		/// value of {@link #GetAnalyzer()}.  If the document contains more than
		/// {@link #maxFieldLength} terms for a given Field, the remainder are
		/// discarded.
		/// </summary>
		public virtual void  AddDocument(Document doc, Analyzer analyzer)
		{
			DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength);
			System.String segmentName = NewSegmentName();
			dw.AddDocument(segmentName, doc);
			lock (this)
			{
				segmentInfos.Add(new SegmentInfo(segmentName, 1, ramDirectory));
				MaybeMergeSegments();
			}
		}
		/// <summary> Adds a document to this index.  If the document contains more than
		/// {@link #maxFieldLength} terms for a given Field, the remainder are
		/// discarded.
		/// </summary>
		public virtual void  AddDocument(Document doc)
		{
			AddDocument(doc, analyzer);
		}