SetMaxFieldLength() public method

The maximum number of terms that will be indexed for a single field in a document. This limits the amount of memory required for indexing, so that collections with very large files will not crash the indexing process by running out of memory. This setting refers to the number of running terms, not to the number of different terms.

Note: this silently truncates large documents, excluding from the index all terms that occur further in the document. If you know your source documents are large, be sure to set this value high enough to accomodate the expected size. If you set it to Integer.MAX_VALUE, then the only limit is your memory, but you should anticipate an OutOfMemoryError.

By default, no more than DEFAULT_MAX_FIELD_LENGTH terms will be indexed for a field.

public SetMaxFieldLength ( int maxFieldLength ) : void
maxFieldLength int
return void
 public LuceneMetadataRepository()
 {
     _analyzer = new StandardAnalyzer();
     _directory = FSDirectory.GetDirectory("./index", true);
     _indexWriter = new IndexWriter(_directory, _analyzer, true);
     _indexWriter.SetMaxFieldLength(25000);
     _indexSearcher = new IndexSearcher(_directory);
 }
Beispiel #2
0
 /// <summary> The maximum number of terms that will be indexed for a single field in a
 /// document.  This limits the amount of memory required for indexing, so that
 /// collections with very large files will not crash the indexing process by
 /// running out of memory.<p/>
 /// Note that this effectively truncates large documents, excluding from the
 /// index terms that occur further in the document.  If you know your source
 /// documents are large, be sure to set this value high enough to accomodate
 /// the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
 /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
 /// By default, no more than 10,000 terms will be indexed for a field.
 /// </summary>
 /// <seealso cref="IndexWriter#SetMaxFieldLength(int)">
 /// </seealso>
 /// <throws>  IllegalStateException if the index is closed </throws>
 public virtual void  SetMaxFieldLength(int maxFieldLength)
 {
     lock (directory)
     {
         AssureOpen();
         if (indexWriter != null)
         {
             indexWriter.SetMaxFieldLength(maxFieldLength);
         }
         this.maxFieldLength = maxFieldLength;
     }
 }
Beispiel #3
0
        public void Index()
        {
            Directory directory = FSDirectory.GetDirectory("./index", true);
            var iwriter = new IndexWriter(directory, _analyzer, true);
            iwriter.SetMaxFieldLength(25000);

            var x = Repository.List();
            foreach (var package in x)
            {
                var doc = new Document();
                doc.Add(new Field("name", package.Name, Field.Store.YES, Field.Index.TOKENIZED));
                doc.Add(new Field("description", package.Description, Field.Store.YES, Field.Index.TOKENIZED));
                iwriter.AddDocument(doc);
            }

            iwriter.Close();
        }
Beispiel #4
0
 /// <summary> Close the IndexReader and open an IndexWriter.</summary>
 /// <throws>  IOException </throws>
 protected internal virtual void  CreateIndexWriter()
 {
     if (indexWriter == null)
     {
         if (indexReader != null)
         {
             indexReader.Close();
             indexReader = null;
         }
         indexWriter = new IndexWriter(directory, analyzer, false);
         indexWriter.SetInfoStream(infoStream);
         indexWriter.SetUseCompoundFile(useCompoundFile);
         indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
         indexWriter.SetMaxFieldLength(maxFieldLength);
         indexWriter.SetMergeFactor(mergeFactor);
     }
 }
Beispiel #5
0
 /// <summary> Close the IndexReader and open an IndexWriter.</summary>
 /// <throws>  CorruptIndexException if the index is corrupt </throws>
 /// <throws>  LockObtainFailedException if another writer </throws>
 /// <summary>  has this index open (<code>write.lock</code> could not
 /// be obtained)
 /// </summary>
 /// <throws>  IOException if there is a low-level IO error </throws>
 protected internal virtual void  CreateIndexWriter()
 {
     if (indexWriter == null)
     {
         if (indexReader != null)
         {
             indexReader.Close();
             indexReader = null;
         }
         indexWriter = new IndexWriter(directory, analyzer, false);
         // IndexModifier cannot use ConcurrentMergeScheduler
         // because it synchronizes on the directory which can
         // cause deadlock
         indexWriter.SetMergeScheduler(new SerialMergeScheduler());
         indexWriter.SetInfoStream(infoStream);
         indexWriter.SetUseCompoundFile(useCompoundFile);
         if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH)
         {
             indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
         }
         indexWriter.SetMaxFieldLength(maxFieldLength);
         indexWriter.SetMergeFactor(mergeFactor);
     }
 }
		public virtual void  TestHighFreqTerm()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetRAMBufferSizeMB(0.01);
			writer.SetMaxFieldLength(100000000);
			// Massive doc that has 128 K a's
			System.Text.StringBuilder b = new System.Text.StringBuilder(1024 * 1024);
			for (int i = 0; i < 4096; i++)
			{
				b.Append(" a a a a a a a a");
				b.Append(" a a a a a a a a");
				b.Append(" a a a a a a a a");
				b.Append(" a a a a a a a a");
			}
			Document doc = new Document();
			doc.Add(new Field("field", b.ToString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			writer.AddDocument(doc);
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			Assert.AreEqual(1, reader.MaxDoc());
			Assert.AreEqual(1, reader.NumDocs());
			Term t = new Term("field", "a");
			Assert.AreEqual(1, reader.DocFreq(t));
			TermDocs td = reader.TermDocs(t);
			td.Next();
			Assert.AreEqual(128 * 1024, td.Freq());
			reader.Close();
			dir.Close();
		}
 public static bool Index(Analyzer analyzer, FileIndexSet fileIndex,IndexerSet indexer, bool create)
 {
     try
     {
         IndexWriter writer = new IndexWriter(fileIndex.Path, analyzer, create);
         writer.SetMaxFieldLength(indexer.MaxFieldLength);
         writer.SetRAMBufferSizeMB(indexer.RamBufferSize);
         writer.SetMergeFactor(indexer.MergeFactor);
         writer.SetMaxBufferedDocs(indexer.MaxBufferedDocs);
         foreach (string dir in fileIndex.BaseDirs)
         {
             IndexDir(writer, dir);
         }
         return true;
     }
     catch (Exception )
     {
         return false;
     }
 }
 public static bool Index(Analyzer analyzer, FileIndexSet set, int maxFieldLength, double ramBufferSize, int mergeFactor, int maxBufferedDocs, bool create)
 {
     try
     {
         IndexWriter writer = new IndexWriter(set.Path, analyzer, create);
         writer.SetMaxFieldLength(maxFieldLength);
         writer.SetRAMBufferSizeMB(ramBufferSize);
         writer.SetMergeFactor(mergeFactor);
         writer.SetMaxBufferedDocs(maxBufferedDocs);
         foreach (string dir in set.BaseDirs)
         {
             IndexDir(writer, dir);
         }
         return true;
     }
     catch (Exception )
     {
         return false;
     }
 }
 public static bool Index(Analyzer analyzer, string savepath,string dir, int maxFieldLength, double ramBufferSize, int mergeFactor, int maxBufferedDocs,bool create)
 {
     try
     {
         IndexWriter writer = new IndexWriter(savepath, analyzer, create);
         writer.SetMaxFieldLength(maxFieldLength);
         writer.SetRAMBufferSizeMB(ramBufferSize);
         writer.SetMergeFactor(mergeFactor);
         writer.SetMaxBufferedDocs(maxBufferedDocs);
         IndexDir(writer, dir);
         return true;
     }
     catch (Exception )
     {
         return false;
     }
 }
Beispiel #10
0
		/// <summary> Close the IndexReader and open an IndexWriter.</summary>
		/// <throws>  IOException </throws>
		protected internal virtual void  CreateIndexWriter()
		{
			if (indexWriter == null)
			{
				if (indexReader != null)
				{
					indexReader.Close();
					indexReader = null;
				}
				indexWriter = new IndexWriter(directory, analyzer, false);
				indexWriter.SetInfoStream(infoStream);
				indexWriter.SetUseCompoundFile(useCompoundFile);
				indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
				indexWriter.SetMaxFieldLength(maxFieldLength);
				indexWriter.SetMergeFactor(mergeFactor);
			}
		}
		/// <summary> Close the IndexReader and open an IndexWriter.</summary>
		/// <throws>  CorruptIndexException if the index is corrupt </throws>
		/// <throws>  LockObtainFailedException if another writer </throws>
		/// <summary>  has this index open (<code>write.lock</code> could not
		/// be obtained)
		/// </summary>
		/// <throws>  IOException if there is a low-level IO error </throws>
		protected internal virtual void  CreateIndexWriter()
		{
			if (indexWriter == null)
			{
				if (indexReader != null)
				{
					indexReader.Close();
					indexReader = null;
				}
				indexWriter = new IndexWriter(directory, analyzer, false);
				// IndexModifier cannot use ConcurrentMergeScheduler
				// because it synchronizes on the directory which can
				// cause deadlock
				indexWriter.SetMergeScheduler(new SerialMergeScheduler());
				indexWriter.SetInfoStream(infoStream);
				indexWriter.SetUseCompoundFile(useCompoundFile);
				if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH)
					indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
				indexWriter.SetMaxFieldLength(maxFieldLength);
				indexWriter.SetMergeFactor(mergeFactor);
			}
		}
Beispiel #12
0
 public static bool IndexFile(bool create, IndexCompletedEventHandler OnIndexCompleted, IndexProgressChangedEventHandler OnProgressChanged)
 {
     try
     {
         if (create)
         {
             SupportClass.FileUtil.DeleteFolder(fileSet.Path);
         }
         IndexWriter writer = new IndexWriter(fileSet.Path, analyzer, create);
         writer.SetMaxFieldLength(indexerSet.MaxFieldLength);
         writer.SetRAMBufferSizeMB(indexerSet.RamBufferSize);
         writer.SetMergeFactor(indexerSet.MergeFactor);
         writer.SetMaxBufferedDocs(indexerSet.MaxBufferedDocs);
         int i=0;
         foreach (string dir in fileSet.BaseDirs)
         {
             i++;
             FileIndexer.IndexDir(writer, dir,OnProgressChanged);
             OnProgressChanged("IndexUtil", new IndexProgressChangedEventArgs(fileSet.BaseDirs.Count, i));
         }
         writer.Optimize();
         writer.Close();
         OnIndexCompleted("IndexUtil", new IndexCompletedEventArgs("File"));
         return true;
     }
     catch (Exception e)
     {
         throw e;
     }
 }
Beispiel #13
0
 public static bool IndexFile(bool create)
 {
     if (string.IsNullOrEmpty(fileSet.Path))
         return true;
     try
     {
         if (create)
         {
             SupportClass.FileUtil.DeleteFolder(fileSet.Path);
         }
         IndexWriter writer = new IndexWriter(fileSet.Path, analyzer, create);
         writer.SetMaxFieldLength(indexerSet.MaxFieldLength);
         writer.SetRAMBufferSizeMB(indexerSet.RamBufferSize);
         writer.SetMergeFactor(indexerSet.MergeFactor);
         writer.SetMaxBufferedDocs(indexerSet.MaxBufferedDocs);
         foreach (string dir in fileSet.BaseDirs)
         {
             FileIndexer.IndexDir(writer, dir);
         }
         writer.Optimize();
         writer.Close();
         return true;
     }
     catch (Exception e)
     {
         throw e;
     }
 }