Пример #1
0
        /*internal*/ public void  AddDocument(System.String segment, Document doc)
        {
            // write Field names
            fieldInfos = new FieldInfos();
            fieldInfos.Add(doc);
            fieldInfos.Write(directory, segment + ".fnm");

            // write Field values
            FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);

            try
            {
                fieldsWriter.AddDocument(doc);
            }
            finally
            {
                fieldsWriter.Close();
            }

            // invert doc into postingTable
            postingTable.Clear();                        // clear postingTable
            fieldLengths   = new int[fieldInfos.Size()]; // init fieldLengths
            fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions

            fieldBoosts = new float[fieldInfos.Size()];  // init fieldBoosts
            float boost = doc.GetBoost();

            for (int i = 0; i < fieldBoosts.Length; i++)
            {
                fieldBoosts[i] = boost;
            }

            InvertDocument(doc);

            // sort postingTable into an array
            Posting[] postings = SortPostingTable();

            /*
             * for (int i = 0; i < postings.length; i++) {
             * Posting posting = postings[i];
             * System.out.print(posting.term);
             * System.out.print(" freq=" + posting.freq);
             * System.out.print(" pos=");
             * System.out.print(posting.positions[0]);
             * for (int j = 1; j < posting.freq; j++)
             * System.out.print("," + posting.positions[j]);
             * System.out.println("");
             * }
             */

            // write postings
            WritePostings(postings, segment);

            // write norms of indexed fields
            WriteNorms(doc, segment);
        }
Пример #2
0
		/*internal*/ public void  AddDocument(System.String segment, Document doc)
		{
			// write Field names
			fieldInfos = new FieldInfos();
			fieldInfos.Add(doc);
			fieldInfos.Write(directory, segment + ".fnm");
			
			// write Field values
			FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
			try
			{
				fieldsWriter.AddDocument(doc);
			}
			finally
			{
				fieldsWriter.Close();
			}
			
			// invert doc into postingTable
			postingTable.Clear(); // clear postingTable
			fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths
			fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions
			
			fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts
            float boost = doc.GetBoost();
            for (int i = 0; i < fieldBoosts.Length; i++)
            {
                fieldBoosts[i] = boost;
            }
			
			InvertDocument(doc);
			
			// sort postingTable into an array
			Posting[] postings = SortPostingTable();
			
			/*
			for (int i = 0; i < postings.length; i++) {
			Posting posting = postings[i];
			System.out.print(posting.term);
			System.out.print(" freq=" + posting.freq);
			System.out.print(" pos=");
			System.out.print(posting.positions[0]);
			for (int j = 1; j < posting.freq; j++)
			System.out.print("," + posting.positions[j]);
			System.out.println("");
			}
			*/
			
			// write postings
			WritePostings(postings, segment);
			
			// write norms of indexed fields
			WriteNorms(doc, segment);
		}
        private void  CreateCompoundFile()
        {
            CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segment + ".cfs");

            System.Collections.ArrayList files = new System.Collections.ArrayList(COMPOUND_EXTENSIONS.Length + fieldInfos.Size());

            // Basic files
            for (int i = 0; i < COMPOUND_EXTENSIONS.Length; i++)
            {
                files.Add(segment + "." + COMPOUND_EXTENSIONS[i]);
            }

            // Field norm files
            for (int i = 0; i < fieldInfos.Size(); i++)
            {
                FieldInfo fi = fieldInfos.FieldInfo(i);
                if (fi.isIndexed)
                {
                    files.Add(segment + ".f" + i);
                }
            }

            // Vector files
            if (fieldInfos.HasVectors())
            {
                for (int i = 0; i < VECTOR_EXTENSIONS.Length; i++)
                {
                    files.Add(segment + "." + VECTOR_EXTENSIONS[i]);
                }
            }

            // Now merge all added files
            System.Collections.IEnumerator it = files.GetEnumerator();
            while (it.MoveNext())
            {
                cfsWriter.AddFile((System.String)it.Current);
            }

            // Perform the merge
            cfsWriter.Close();

            // Now delete the source files
            it = files.GetEnumerator();
            while (it.MoveNext())
            {
                directory.DeleteFile((System.String)it.Current);
            }
        }
Пример #4
0
		/// <summary>Create term vectors writer for the specified segment in specified
		/// directory.  A new TermVectorsWriter should be created for each
		/// segment. The parameter <code>maxFields</code> indicates how many total
		/// fields are found in this document. Not all of these fields may require
		/// termvectors to be stored, so the number of calls to
		/// <code>openField</code> is less or equal to this number.
		/// </summary>
		public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos)
		{
			// Open files for TermVector storage
			tvx = directory.CreateFile(segment + TVX_EXTENSION);
			tvx.WriteInt(FORMAT_VERSION);
			tvd = directory.CreateFile(segment + TVD_EXTENSION);
			tvd.WriteInt(FORMAT_VERSION);
			tvf = directory.CreateFile(segment + TVF_EXTENSION);
			tvf.WriteInt(FORMAT_VERSION);
			
			this.fieldInfos = fieldInfos;
			fields = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(fieldInfos.Size()));
			terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
		}
        internal System.Collections.ArrayList Files()
        {
            System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
            System.String[] ext = new System.String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp" };

            for (int i = 0; i < ext.Length; i++)
            {
                System.String name = segment + "." + ext[i];
                if (Directory().FileExists(name))
                {
                    files.Add(name);
                }
            }

            for (int i = 0; i < fieldInfos.Size(); i++)
            {
                FieldInfo fi = fieldInfos.FieldInfo(i);
                if (fi.isIndexed)
                {
                    files.Add(segment + ".f" + i);
                }
            }
            return(files);
        }
Пример #6
0
 private void  WriteNorms(Document doc, System.String segment)
 {
     for (int n = 0; n < fieldInfos.Size(); n++)
     {
         FieldInfo fi = fieldInfos.FieldInfo(n);
         if (fi.isIndexed)
         {
             float        norm  = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]);
             OutputStream norms = directory.CreateFile(segment + ".f" + n);
             try
             {
                 norms.WriteByte(Monodoc.Lucene.Net.Search.Similarity.EncodeNorm(norm));
             }
             finally
             {
                 norms.Close();
             }
         }
     }
 }
        /// <summary>Create term vectors writer for the specified segment in specified
        /// directory.  A new TermVectorsWriter should be created for each
        /// segment. The parameter <code>maxFields</code> indicates how many total
        /// fields are found in this document. Not all of these fields may require
        /// termvectors to be stored, so the number of calls to
        /// <code>openField</code> is less or equal to this number.
        /// </summary>
        public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos)
        {
            // Open files for TermVector storage
            tvx = directory.CreateFile(segment + TVX_EXTENSION);
            tvx.WriteInt(FORMAT_VERSION);
            tvd = directory.CreateFile(segment + TVD_EXTENSION);
            tvd.WriteInt(FORMAT_VERSION);
            tvf = directory.CreateFile(segment + TVF_EXTENSION);
            tvf.WriteInt(FORMAT_VERSION);

            this.fieldInfos = fieldInfos;
            fields          = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(fieldInfos.Size()));
            terms           = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
        }