SetRAMBufferSizeMB() публичный Метод

Determines the amount of RAM that may be used for buffering added documents and deletions before they are flushed to the Directory. Generally for faster indexing performance it's best to flush by RAM usage instead of document count and use as large a RAM buffer as you can.

When this is set, the writer will flush whenever buffered documents and deletions use this much RAM. Pass in DISABLE_AUTO_FLUSH to prevent triggering a flush due to RAM usage. Note that if flushing by document count is also enabled, then the flush will be triggered by whichever comes first.

NOTE: the account of RAM usage for pending deletions is only approximate. Specifically, if you delete by Query, Lucene currently has no way to measure the RAM usage if individual Queries so the accounting will under-estimate and you should compensate by either calling commit() periodically yourself, or by using SetMaxBufferedDeleteTerms to flush by count instead of RAM usage (each buffered delete Query counts as one).

NOTE: because IndexWriter uses ints when managing its internal storage, the absolute maximum value for this setting is somewhat less than 2048 MB. The precise limit depends on various factors, such as how large your documents are, how many fields have norms, etc., so it's best to set this value comfortably under 2048.

The default value is DEFAULT_RAM_BUFFER_SIZE_MB.

enabled but non-positive, or it disables ramBufferSize when maxBufferedDocs is already disabled

public SetRAMBufferSizeMB ( double mb ) : void
mb double
Результат void
Пример #1
0
        public void ApplyToWriter(IndexWriter writer)
        {
            try
            {
                if (MergeFactor != null)
                {
                    writer.SetMergeFactor((int) MergeFactor);
                }

                if (MaxMergeDocs != null)
                {
                    writer.SetMaxMergeDocs((int) MaxMergeDocs);
                }

                if (MaxBufferedDocs != null)
                {
                    writer.SetMaxBufferedDocs((int) MaxBufferedDocs);
                }

                if (RamBufferSizeMb != null)
                {
                    writer.SetRAMBufferSizeMB((int) RamBufferSizeMb);
                }

                if (TermIndexInterval != null)
                {
                    writer.SetTermIndexInterval((int) TermIndexInterval);
                }
            }
            catch (ArgumentOutOfRangeException)
            {
                // TODO: Log it
            }
        }
Пример #2
0
        public static IndexWriter GetAzureIndexWriter(this LuceneIndexer indexer)
        {
            indexer.EnsureIndex(false);
            var writer = new IndexWriter(indexer.GetLuceneDirectory(), indexer.IndexingAnalyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.SetRAMBufferSizeMB(10.0);
            writer.SetUseCompoundFile(false);
            writer.SetMaxMergeDocs(10000);
            writer.SetMergeFactor(100);
            return writer;
        }
Пример #3
0
        static void Main(string[] args)
        {
            
            // default AzureDirectory stores cache in local temp folder
            var azureDirectory = new AzureDirectory(CloudStorageAccount.Parse(ConfigurationManager.AppSettings["blobStorage"]), "TestCatalog6");
            var findexExists = IndexReader.IndexExists(azureDirectory);

            IndexWriter indexWriter = null;
            while (indexWriter == null)
            {
                try
                {
                    indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), !IndexReader.IndexExists(azureDirectory), new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
                }
                catch (LockObtainFailedException)
                {
                    Console.WriteLine("Lock is taken, Hit 'Y' to clear the lock, or anything else to try again");
                    if (Console.ReadLine().ToLower().Trim() == "y" )
                        azureDirectory.ClearLock("write.lock");
                }
            };
            Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index");
            indexWriter.SetRAMBufferSizeMB(10.0);
            //indexWriter.SetUseCompoundFile(false);
            //indexWriter.SetMaxMergeDocs(10000);
            //indexWriter.SetMergeFactor(100);
            
            for (int iDoc = 0; iDoc < 10000; iDoc++)
            {
                if (iDoc % 10 == 0)
                    Console.WriteLine(iDoc);
                var doc = new Document();
                doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                indexWriter.AddDocument(doc);
            }
            Console.WriteLine("Total docs is {0}", indexWriter.NumDocs());
            indexWriter.Dispose();

            IndexSearcher searcher;
            using (new AutoStopWatch("Creating searcher"))
            {
                searcher = new IndexSearcher(azureDirectory); 
            }
            SearchForPhrase(searcher, "dog");
            SearchForPhrase(searcher, _random.Next(32768).ToString());
            SearchForPhrase(searcher, _random.Next(32768).ToString());
            Console.Read();
        }
        public void TestReadAndWrite()
        {
            var connectionString = Environment.GetEnvironmentVariable("DataConnectionString") ?? "UseDevelopmentStorage=true";

            var cloudStorageAccount = CloudStorageAccount.Parse(connectionString);

            // default AzureDirectory stores cache in local temp folder
            var azureDirectory = new AzureDirectory(cloudStorageAccount, "testcatalog");

            using (var indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), !IndexReader.IndexExists(azureDirectory), new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH)))
            {
                indexWriter.SetRAMBufferSizeMB(10.0);

                for (int iDoc = 0; iDoc < 10000; iDoc++)
                {
                    var doc = new Document();
                    doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString() + "-" + iDoc.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                    indexWriter.AddDocument(doc);
                }

                Console.WriteLine("Total docs is {0}", indexWriter.NumDocs());
            }

            using (var searcher = new IndexSearcher(azureDirectory))
            {
                Assert.AreNotEqual(0, SearchForPhrase(searcher, "dog"));
                Assert.AreNotEqual(0, SearchForPhrase(searcher, "cat"));
                Assert.AreNotEqual(0, SearchForPhrase(searcher, "car"));
            }

            // check the container exists, and delete it
            var blobClient = cloudStorageAccount.CreateCloudBlobClient();
            var container = blobClient.GetContainerReference("testcatalog");
            Assert.IsTrue(container.Exists()); // check the container exists
            container.Delete();
        }
Пример #5
0
        public static void CreateIndexFromDb()
        {
            using (var luceneDirectory = LuceneDirectory)
            {
                var analyzer = new StandardAnalyzer(Version);
                using (var writer = new IndexWriter(luceneDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    writer.SetRAMBufferSizeMB(20);

                    // add data to lucene search index (replaces older entries if any)
                    using (var db = new SQLDataAccess())
                    {
                        db.cmd.CommandText = "select Product.productId, Product.ArtNo, Name, " +
                        "(select ' ' + Offer.Artno from Catalog.Offer where Offer.ProductID=Product.productid FOR XML path('')) as OfferArtno " +
                        "from Catalog.Product where Product.Enabled=1 and Product.CategoryEnabled=1";
                        db.cmd.CommandType = CommandType.Text;
                        db.cnOpen();
                        using (var reader = db.cmd.ExecuteReader())
                        {
                            while (reader.Read())
                            {
                                Thread.Sleep(0);
                                AddToLuceneIndex(
                                    new SampleData(SQLDataHelper.GetInt(reader, "productId"),
                                                   SQLDataHelper.GetString(reader, "ArtNo") + " " + SQLDataHelper.GetString(reader, "OfferArtno"),
                                                   SQLDataHelper.GetString(reader["Name"])), writer);
                            }
                        }
                        db.cnClose();
                    }
                    // close handles
                    analyzer.Close();
                    writer.Optimize();
                }
            }
        }
Пример #6
0
 public static bool Index(Analyzer analyzer, FileIndexSet fileIndex,IndexerSet indexer, bool create)
 {
     try
     {
         IndexWriter writer = new IndexWriter(fileIndex.Path, analyzer, create);
         writer.SetMaxFieldLength(indexer.MaxFieldLength);
         writer.SetRAMBufferSizeMB(indexer.RamBufferSize);
         writer.SetMergeFactor(indexer.MergeFactor);
         writer.SetMaxBufferedDocs(indexer.MaxBufferedDocs);
         foreach (string dir in fileIndex.BaseDirs)
         {
             IndexDir(writer, dir);
         }
         return true;
     }
     catch (Exception )
     {
         return false;
     }
 }
Пример #7
0
		private void CreateIndexWriter()
		{
			indexWriter = new IndexWriter(directory, analyzer, indexDeletionPolicy, maxFieldLength);
            if(_indexReaderWarmer!=null)
            {
                indexWriter.MergedSegmentWarmer = _indexReaderWarmer;
            }
			using (indexWriter.MergeScheduler) { }
			indexWriter.SetMergeScheduler(new ErrorLoggingConcurrentMergeScheduler());

			// RavenDB already manages the memory for those, no need for Lucene to do this as well
			indexWriter.SetMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH);
			indexWriter.SetRAMBufferSizeMB(1024);

			currentNumberOfWrites = 0;
		}
Пример #8
0
		public virtual void  TestOptimizeOverMerge()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer());
			writer.SetMaxBufferedDocs(2);
			writer.SetMergeFactor(100);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			
			Document document = new Document();
			
			document = new Document();
			Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO);
			document.Add(storedField);
			Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
			document.Add(termVectorField);
			for (int i = 0; i < 170; i++)
				writer.AddDocument(document);
			
			writer.Close();
			MyIndexWriter myWriter = new MyIndexWriter(this, dir);
			myWriter.Optimize();
			Assert.AreEqual(10, myWriter.mergeCount);
		}
        public void optimizeIndex(string azureContainerName)
        {
            XmlDocument xdoc = new XmlDocument();
              xdoc.LoadXml(new Utility().getConfigXML());
              XmlNode xNode = xdoc.SelectSingleNode(string.Format("//blobdata[@name='default']"));

              string azureAccount = xNode.Attributes["account"].Value;
              string azureEndpoint = xNode.Attributes["endpoint"].Value;
              string azureSharedKey = xNode.Attributes["accountSharedKey"].Value;
              string blobStorage = xNode.Attributes["endpoint"].Value;

              xNode = xdoc.SelectSingleNode(string.Format("//fragmentData/Setting[@name='HandlerFragments']"));
              string fragmentLocation = xNode.Attributes["value"].Value;

              Microsoft.WindowsAzure.StorageCredentialsAccountAndKey scaak = new Microsoft.WindowsAzure.StorageCredentialsAccountAndKey(azureAccount, azureSharedKey);
              Microsoft.WindowsAzure.CloudStorageAccount csa = new Microsoft.WindowsAzure.CloudStorageAccount(scaak, false);
              AzureDirectory azureDirectory = new AzureDirectory(csa, azureContainerName, new RAMDirectory());
              bool findexExists = false;
              try
              {
            findexExists = IndexReader.IndexExists(azureDirectory);
            if ((findexExists) && IndexWriter.IsLocked(azureDirectory))
              azureDirectory.ClearLock("write.lock");
              }
              catch (Exception e)
              {
            Trace.WriteLine(e.ToString());
            return;
              }

              IndexWriter idxW = new IndexWriter(azureDirectory, new SnowballAnalyzer("English"), !findexExists, new IndexWriter.MaxFieldLength(1024));
              idxW.SetRAMBufferSizeMB(10.0);
              idxW.SetUseCompoundFile(false);
              idxW.SetMaxMergeDocs(10000);
              idxW.SetMergeFactor(100);
              idxW.Optimize();
        }
Пример #10
0
		public virtual void  TestDiverseDocs()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetRAMBufferSizeMB(0.5);
			System.Random rand = new System.Random((System.Int32) 31415);
			for (int i = 0; i < 3; i++)
			{
				// First, docs where every term is unique (heavy on
				// Posting instances)
				for (int j = 0; j < 100; j++)
				{
					Document doc = new Document();
					for (int k = 0; k < 100; k++)
					{
						doc.Add(new Field("field", System.Convert.ToString(rand.Next()), Field.Store.YES, Field.Index.TOKENIZED));
					}
					writer.AddDocument(doc);
				}
				
				// Next, many single term docs where only one term
				// occurs (heavy on byte blocks)
				for (int j = 0; j < 100; j++)
				{
					Document doc = new Document();
					doc.Add(new Field("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(doc);
				}
				
				// Next, many single term docs where only one term
				// occurs but the terms are very long (heavy on
				// char[] arrays)
				for (int j = 0; j < 100; j++)
				{
					System.Text.StringBuilder b = new System.Text.StringBuilder();
					System.String x = System.Convert.ToString(j) + ".";
					for (int k = 0; k < 1000; k++)
						b.Append(x);
					System.String longTerm = b.ToString();
					
					Document doc = new Document();
					doc.Add(new Field("field", longTerm, Field.Store.YES, Field.Index.TOKENIZED));
					writer.AddDocument(doc);
				}
			}
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(dir);
			Hits hits = searcher.Search(new TermQuery(new Term("field", "aaa")));
			Assert.AreEqual(300, hits.Length());
			searcher.Close();
			
			dir.Close();
		}
Пример #11
0
		public virtual void  TestTermVectorCorruption2()
		{
			Directory dir = new MockRAMDirectory();
			for (int iter = 0; iter < 4; iter++)
			{
				bool autoCommit = 1 == iter / 2;
				IndexWriter writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer());
				writer.SetMaxBufferedDocs(2);
				writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
				writer.SetMergeScheduler(new SerialMergeScheduler());
				writer.SetMergePolicy(new LogDocMergePolicy());
				
				Document document = new Document();
				
				Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO);
				document.Add(storedField);
				writer.AddDocument(document);
				writer.AddDocument(document);
				
				document = new Document();
				document.Add(storedField);
				Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
				document.Add(termVectorField);
				writer.AddDocument(document);
				writer.Optimize();
				writer.Close();
				
				IndexReader reader = IndexReader.Open(dir);
				Assert.IsTrue(reader.GetTermFreqVectors(0) == null);
				Assert.IsTrue(reader.GetTermFreqVectors(1) == null);
				Assert.IsTrue(reader.GetTermFreqVectors(2) != null);
				reader.Close();
			}
			dir.Close();
		}
Пример #12
0
		public virtual void  TestExpungeDeletes3()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMaxBufferedDocs(2);
			writer.SetMergeFactor(50);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			
			Document document = new Document();
			
			document = new Document();
			Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO);
			document.Add(storedField);
			Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
			document.Add(termVectorField);
			for (int i = 0; i < 98; i++)
				writer.AddDocument(document);
			writer.Close();
			
			IndexReader ir = IndexReader.Open(dir);
			Assert.AreEqual(98, ir.MaxDoc());
			Assert.AreEqual(98, ir.NumDocs());
			for (int i = 0; i < 98; i += 2)
				ir.DeleteDocument(i);
			Assert.AreEqual(49, ir.NumDocs());
			ir.Close();
			
			writer = new IndexWriter(dir, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			// Force many merges to happen
			writer.SetMergeFactor(3);
			writer.ExpungeDeletes(false);
			writer.Close();
			ir = IndexReader.Open(dir);
			Assert.AreEqual(49, ir.MaxDoc());
			Assert.AreEqual(49, ir.NumDocs());
			ir.Close();
			dir.Close();
		}
Пример #13
0
        static void Main(string[] args)
        {
            // get settings from azure settings or app.config
            CloudStorageAccount.SetConfigurationSettingPublisher((configName, configSetter) =>
            {
                try
                {
                    configSetter(RoleEnvironment.GetConfigurationSettingValue(configName));
                }
                catch (Exception)
                {
                    // for a console app, reading from App.config
                    configSetter(System.Configuration.ConfigurationManager.AppSettings[configName]);
                }
            });

            // default AzureDirectory stores cache in local temp folder
            AzureDirectory azureDirectory = new AzureDirectory(CloudStorageAccount.FromConfigurationSetting("blobStorage"), "TestCatalog");
            bool findexExists = false;
            try
            {
                findexExists = IndexReader.IndexExists(azureDirectory);
                if ((findexExists) && IndexReader.IsLocked(azureDirectory))
                    azureDirectory.ClearLock("write.lock");
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
                return;
            }
            IndexWriter indexWriter = new IndexWriter(azureDirectory, new Lucene.Net.Analysis.Standard.StandardAnalyzer(), !findexExists);
            indexWriter.SetRAMBufferSizeMB(10.0);
            indexWriter.SetUseCompoundFile(false);
            indexWriter.SetMaxMergeDocs(10000);
            indexWriter.SetMergeFactor(100);
            fExit = true;
            for (int iDoc = 0; iDoc < 100; iDoc++)
            {
                if (fExit)
                    break;
                if (iDoc % 10 == 0)
                    Console.WriteLine(iDoc);
                Document doc = new Document();
                doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                indexWriter.AddDocument(doc);
            }
            Console.WriteLine("Total docs is {0}", indexWriter.DocCount());
            indexWriter.Close();

            IndexSearcher searcher;
            using (new AutoStopWatch("Creating searcher"))
            {
                searcher = new IndexSearcher(azureDirectory); // IndexReader.Open(
            }
            SearchForPhrase(searcher, "dog");
            SearchForPhrase(searcher, _random.Next(32768).ToString());
            SearchForPhrase(searcher, _random.Next(32768).ToString());
        }
Пример #14
0
		public virtual void  TestChangingRAMBuffer2()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMaxBufferedDocs(10);
			writer.SetMaxBufferedDeleteTerms(10);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			
			for (int j = 1; j < 52; j++)
			{
				Document doc = new Document();
				doc.Add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED));
				writer.AddDocument(doc);
			}
			
			int lastFlushCount = - 1;
			for (int j = 1; j < 52; j++)
			{
				writer.DeleteDocuments(new Term("field", "aaa" + j));
				_TestUtil.SyncConcurrentMerges(writer);
				int flushCount = writer.GetFlushCount();
				if (j == 1)
					lastFlushCount = flushCount;
				else if (j < 10)
				{
					// No new files should be created
					Assert.AreEqual(flushCount, lastFlushCount);
				}
				else if (10 == j)
				{
					Assert.IsTrue(flushCount > lastFlushCount);
					lastFlushCount = flushCount;
					writer.SetRAMBufferSizeMB(0.000001);
					writer.SetMaxBufferedDeleteTerms(1);
				}
				else if (j < 20)
				{
					Assert.IsTrue(flushCount > lastFlushCount);
					lastFlushCount = flushCount;
				}
				else if (20 == j)
				{
					writer.SetRAMBufferSizeMB(16);
					writer.SetMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH);
					lastFlushCount = flushCount;
				}
				else if (j < 30)
				{
					Assert.AreEqual(flushCount, lastFlushCount);
				}
				else if (30 == j)
				{
					writer.SetRAMBufferSizeMB(0.000001);
					writer.SetMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH);
					writer.SetMaxBufferedDeleteTerms(1);
				}
				else if (j < 40)
				{
					Assert.IsTrue(flushCount > lastFlushCount);
					lastFlushCount = flushCount;
				}
				else if (40 == j)
				{
					writer.SetMaxBufferedDeleteTerms(10);
					writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
					lastFlushCount = flushCount;
				}
				else if (j < 50)
				{
					Assert.AreEqual(flushCount, lastFlushCount);
					writer.SetMaxBufferedDeleteTerms(10);
					writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
				}
				else if (50 == j)
				{
					Assert.IsTrue(flushCount > lastFlushCount);
				}
			}
			writer.Close();
			dir.Close();
		}
Пример #15
0
		public virtual void  TestTermVectorCorruption()
		{
			
			Directory dir = new MockRAMDirectory();
			for (int iter = 0; iter < 4; iter++)
			{
				bool autoCommit = 1 == iter / 2;
				IndexWriter writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer());
				writer.SetMaxBufferedDocs(2);
				writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
				writer.SetMergeScheduler(new SerialMergeScheduler());
				writer.SetMergePolicy(new LogDocMergePolicy(writer));
				
				Document document = new Document();
				
				Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO);
				document.Add(storedField);
				writer.AddDocument(document);
				writer.AddDocument(document);
				
				document = new Document();
				document.Add(storedField);
				Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
				
				document.Add(termVectorField);
				writer.AddDocument(document);
				writer.Optimize();
				writer.Close();
				
				IndexReader reader = IndexReader.Open(dir);
				for (int i = 0; i < reader.NumDocs(); i++)
				{
					reader.Document(i);
					reader.GetTermFreqVectors(i);
				}
				reader.Close();
				
				writer = new IndexWriter(dir, autoCommit, new StandardAnalyzer());
				writer.SetMaxBufferedDocs(2);
				writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
				writer.SetMergeScheduler(new SerialMergeScheduler());
				writer.SetMergePolicy(new LogDocMergePolicy(writer));
				
				Directory[] indexDirs = new Directory[]{new MockRAMDirectory(dir)};
				writer.AddIndexes(indexDirs);
				writer.Close();
			}
			dir.Close();
		}
Пример #16
0
        static void Do()
        {
            //var directory = new SimpleFSDirectory(new DirectoryInfo(@"c:\temp\lucene"));
            using (var connection = new SqlConnection(@"MultipleActiveResultSets=True;Data Source=(localdb)\v11.0;Initial Catalog=TestLucene;Integrated Security=True;Connect Timeout=30;Encrypt=False;TrustServerCertificate=False"))
            {
                connection.Open();
                var directory = new SqlServerDirectory(connection, new Options() { SchemaName = "[search]" });

                for (int outer = 0; outer < 1000; outer++)
                {

                    IndexWriter indexWriter = null;
                    while (indexWriter == null)
                    {
                        try
                        {
                            indexWriter = new IndexWriter(directory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30),
                                !IndexReader.IndexExists(directory),
                                new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
                        }
                        catch (LockObtainFailedException)
                        {
                            Console.WriteLine("Lock is taken, waiting for timeout...");
                            Thread.Sleep(1000);
                        }
                    }
                ;
                    Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index");
                    indexWriter.SetRAMBufferSizeMB(100.0);
                    indexWriter.SetInfoStream(new StreamWriter(Console.OpenStandardOutput()));
                    indexWriter.UseCompoundFile = false;

                    for (int iDoc = 0; iDoc < 1000; iDoc++)
                    {
                        if (iDoc % 10 == 0)
                            Console.WriteLine(iDoc);
                        Document doc = new Document();
                        doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES,
                            Field.Index.ANALYZED, Field.TermVector.NO));
                        doc.Add(new Field("Title", "dog " + GeneratePhrase(50), Field.Store.NO, Field.Index.ANALYZED,
                            Field.TermVector.NO));
                        doc.Add(new Field("Body", "dog " + GeneratePhrase(50), Field.Store.NO, Field.Index.ANALYZED,
                            Field.TermVector.NO));
                        indexWriter.AddDocument(doc);
                    }

                    Console.WriteLine("Total docs is {0}", indexWriter.NumDocs());

                    Console.Write("Flushing and disposing writer...");
                    indexWriter.Flush(true, true, true);
                    indexWriter.Dispose();
                }

                IndexSearcher searcher;

                using (new AutoStopWatch("Creating searcher"))
                {
                    searcher = new IndexSearcher(directory);
                }
                using (new AutoStopWatch("Count"))
                    Console.WriteLine("Number of docs: {0}", searcher.IndexReader.NumDocs());

                while (true)
                {
                    SearchForPhrase(searcher, "microsoft");
                    Thread.Sleep(1000);
                    //Console.WriteLine("Press a key to search again");
                    //Console.ReadKey();
                }
            }
        }
Пример #17
0
        static void Main(string[] args)
        {
            // get settings from azure settings or app.config
            CloudStorageAccount.SetConfigurationSettingPublisher((configName, configSetter) =>
            {
                try
                {
                    configSetter(RoleEnvironment.GetConfigurationSettingValue(configName));
                }
                catch (Exception)
                {
                    // for a console app, reading from App.config
                    configSetter(System.Configuration.ConfigurationManager.AppSettings[configName]);
                }
            });

            // default AzureDirectory stores cache in local temp folder
            AzureDirectory azureDirectory = new AzureDirectory(CloudStorageAccount.FromConfigurationSetting("blobStorage"), "TestCatalog6");
            bool findexExists = IndexReader.IndexExists(azureDirectory);

            IndexWriter indexWriter = null;
            while (indexWriter == null)
            {
                try
                {
                    indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(), !IndexReader.IndexExists(azureDirectory));
                }
                catch (LockObtainFailedException)
                {
                    Console.WriteLine("Lock is taken, Hit 'Y' to clear the lock, or anything else to try again");
                    if (Console.ReadLine().ToLower().Trim() == "y" )
                        azureDirectory.ClearLock("write.lock");
                }
            };
            Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index");
            indexWriter.SetRAMBufferSizeMB(10.0);
            indexWriter.SetUseCompoundFile(false);
            indexWriter.SetMaxMergeDocs(10000);
            indexWriter.SetMergeFactor(100);

            for (int iDoc = 0; iDoc < 10000; iDoc++)
            {
                if (iDoc % 10 == 0)
                    Console.WriteLine(iDoc);
                Document doc = new Document();
                doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
                indexWriter.AddDocument(doc);
            }
            Console.WriteLine("Total docs is {0}", indexWriter.DocCount());
            indexWriter.Close();

            IndexSearcher searcher;
            using (new AutoStopWatch("Creating searcher"))
            {
                searcher = new IndexSearcher(azureDirectory);
            }
            SearchForPhrase(searcher, "dog");
            SearchForPhrase(searcher, _random.Next(32768).ToString());
            SearchForPhrase(searcher, _random.Next(32768).ToString());
        }
Пример #18
0
 public virtual IndexWriter GetIndexWriter( string indexName )
 {
     IndexWriter writer;
     if ( !indexWriters.TryGetValue( indexName, out writer ) )
     {
         var index = GetIndex( indexName );
         var analyzer = GetIndexingAnalyzer( indexName );
         writer = new IndexWriter( index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED );
         writer.SetRAMBufferSizeMB( configuration.MemoryBufferLimit );
         writer.SetWriteLockTimeout( configuration.WriterLockTimeout );
         writer.MaybeMerge();
         indexWriters.TryAdd( indexName, writer );
     }
     return writer;
 }
Пример #19
0
		public virtual void  TestSmallRAMBuffer()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetRAMBufferSizeMB(0.000001);
			int lastNumFile = dir.List().Length;
			for (int j = 0; j < 9; j++)
			{
				Document doc = new Document();
				doc.Add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED));
				writer.AddDocument(doc);
				int numFile = dir.List().Length;
				// Verify that with a tiny RAM buffer we see new
				// segment after every doc
				Assert.IsTrue(numFile > lastNumFile);
				lastNumFile = numFile;
			}
			writer.Close();
			dir.Close();
		}
		public virtual void  TestExactFileNames()
		{
			
			for (int pass = 0; pass < 2; pass++)
			{
				
				System.String outputDir = "lucene.backwardscompat0.index";
				RmDir(outputDir);
				
				try
				{
					Directory dir = FSDirectory.Open(new System.IO.FileInfo(FullDir(outputDir)));
					
					bool autoCommit = 0 == pass;
					
					IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
					writer.SetRAMBufferSizeMB(16.0);
					for (int i = 0; i < 35; i++)
					{
						AddDoc(writer, i);
					}
					Assert.AreEqual(35, writer.DocCount(), "wrong doc count");
					writer.Close();
					
					// Delete one doc so we get a .del file:
					IndexReader reader = IndexReader.Open(dir);
					Term searchTerm = new Term("id", "7");
					int delCount = reader.DeleteDocuments(searchTerm);
					Assert.AreEqual(1, delCount, "didn't delete the right number of documents");
					
					// Set one norm so we get a .s0 file:
					reader.SetNorm(21, "content", (float) 1.5);
					reader.Close();
					
					// The numbering of fields can vary depending on which
					// JRE is in use.  On some JREs we see content bound to
					// field 0; on others, field 1.  So, here we have to
					// figure out which field number corresponds to
					// "content", and then set our expected file names below
					// accordingly:
					CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs");
					FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
					int contentFieldIndex = - 1;
					for (int i = 0; i < fieldInfos.Size(); i++)
					{
						FieldInfo fi = fieldInfos.FieldInfo(i);
						if (fi.name_ForNUnit.Equals("content"))
						{
							contentFieldIndex = i;
							break;
						}
					}
					cfsReader.Close();
					Assert.IsTrue(contentFieldIndex != - 1, "could not locate the 'content' field number in the _2.cfs segment");
					
					// Now verify file names:
					System.String[] expected;
					expected = new System.String[]{"_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen"};
					
					System.String[] actual = dir.ListAll();
					System.Array.Sort(expected);
					System.Array.Sort(actual);
					if (!SupportClass.CollectionsHelper.Equals(expected, actual))
					{
						Assert.Fail("incorrect filenames in index: expected:\n    " + AsString(expected) + "\n  actual:\n    " + AsString(actual));
					}
					dir.Close();
				}
				finally
				{
					RmDir(outputDir);
				}
			}
		}
Пример #21
0
		public virtual void  TestChangingRAMBuffer2()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			writer.SetMaxBufferedDeleteTerms(10);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			
			for (int j = 1; j < 52; j++)
			{
				Document doc = new Document();
				doc.Add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED));
				writer.AddDocument(doc);
			}
			
			long lastGen = - 1;
			for (int j = 1; j < 52; j++)
			{
				writer.DeleteDocuments(new Term("field", "aaa" + j));
				_TestUtil.SyncConcurrentMerges(writer);
				long gen = SegmentInfos.GenerationFromSegmentsFileName(SegmentInfos.GetCurrentSegmentFileName(dir.List()));
				if (j == 1)
					lastGen = gen;
				else if (j < 10)
				{
					// No new files should be created
					Assert.AreEqual(gen, lastGen);
				}
				else if (10 == j)
				{
					Assert.IsTrue(gen > lastGen);
					lastGen = gen;
					writer.SetRAMBufferSizeMB(0.000001);
					writer.SetMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH);
				}
				else if (j < 20)
				{
					Assert.IsTrue(gen > lastGen);
					lastGen = gen;
				}
				else if (20 == j)
				{
					writer.SetRAMBufferSizeMB(16);
					writer.SetMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH);
					lastGen = gen;
				}
				else if (j < 30)
				{
					Assert.AreEqual(gen, lastGen);
				}
				else if (30 == j)
				{
					writer.SetRAMBufferSizeMB(0.000001);
					writer.SetMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH);
				}
				else if (j < 40)
				{
					Assert.IsTrue(gen > lastGen);
					lastGen = gen;
				}
				else if (40 == j)
				{
					writer.SetMaxBufferedDeleteTerms(10);
					writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
					lastGen = gen;
				}
				else if (j < 50)
				{
					Assert.AreEqual(gen, lastGen);
					writer.SetMaxBufferedDeleteTerms(10);
					writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
				}
				else if (50 == j)
				{
					Assert.IsTrue(gen > lastGen);
				}
			}
			writer.Close();
			dir.Close();
		}
Пример #22
0
		private static IndexWriter CreateIndexWriter(Directory directory)
		{
			var indexWriter = new IndexWriter(directory, new StopAnalyzer(Version.LUCENE_29), IndexWriter.MaxFieldLength.UNLIMITED);
			using (indexWriter.MergeScheduler){}
			indexWriter.SetMergeScheduler(new ErrorLoggingConcurrentMergeScheduler());

			// RavenDB already manages the memory for those, no need for Lucene to do this as well

			indexWriter.MergeFactor = 1024;
			indexWriter.SetMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH);
			indexWriter.SetRAMBufferSizeMB(1024);
			return indexWriter;
		}
Пример #23
0
		public virtual void  TestHighFreqTerm()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetRAMBufferSizeMB(0.01);
			writer.SetMaxFieldLength(100000000);
			// Massive doc that has 128 K a's
			System.Text.StringBuilder b = new System.Text.StringBuilder(1024 * 1024);
			for (int i = 0; i < 4096; i++)
			{
				b.Append(" a a a a a a a a");
				b.Append(" a a a a a a a a");
				b.Append(" a a a a a a a a");
				b.Append(" a a a a a a a a");
			}
			Document doc = new Document();
			doc.Add(new Field("field", b.ToString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			writer.AddDocument(doc);
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			Assert.AreEqual(1, reader.MaxDoc());
			Assert.AreEqual(1, reader.NumDocs());
			Term t = new Term("field", "a");
			Assert.AreEqual(1, reader.DocFreq(t));
			TermDocs td = reader.TermDocs(t);
			td.Next();
			Assert.AreEqual(128 * 1024, td.Freq());
			reader.Close();
			dir.Close();
		}
Пример #24
0
        public virtual void TestExactFileNames()
        {
            System.String outputDir = "lucene.backwardscompat0.index";
            RmDir(outputDir);

            try
            {
                Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir)));

                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true,
                                                     IndexWriter.MaxFieldLength.UNLIMITED);
                writer.SetRAMBufferSizeMB(16.0);
                for (int i = 0; i < 35; i++)
                {
                    AddDoc(writer, i);
                }
                Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count");
                writer.Close();

                // Delete one doc so we get a .del file:
                IndexReader reader     = IndexReader.Open(dir, false);
                Term        searchTerm = new Term("id", "7");
                int         delCount   = reader.DeleteDocuments(searchTerm);
                Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

                // Set one norm so we get a .s0 file:
                reader.SetNorm(21, "content", (float)1.5);
                reader.Close();

                // The numbering of fields can vary depending on which
                // JRE is in use.  On some JREs we see content bound to
                // field 0; on others, field 1.  So, here we have to
                // figure out which field number corresponds to
                // "content", and then set our expected file names below
                // accordingly:
                CompoundFileReader cfsReader  = new CompoundFileReader(dir, "_0.cfs");
                FieldInfos         fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
                int contentFieldIndex         = -1;
                for (int i = 0; i < fieldInfos.Size(); i++)
                {
                    FieldInfo fi = fieldInfos.FieldInfo(i);
                    if (fi.name_ForNUnit.Equals("content"))
                    {
                        contentFieldIndex = i;
                        break;
                    }
                }
                cfsReader.Close();
                Assert.IsTrue(contentFieldIndex != -1,
                              "could not locate the 'content' field number in the _2.cfs segment");

                // Now verify file names:
                System.String[] expected;
                expected = new System.String[]
                { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" };

                System.String[] actual = dir.ListAll();
                System.Array.Sort(expected);
                System.Array.Sort(actual);
                if (!CollectionsHelper.Equals(expected, actual))
                {
                    Assert.Fail("incorrect filenames in index: expected:\n    " + AsString(expected) +
                                "\n  actual:\n    " + AsString(actual));
                }
                dir.Close();
            }
            finally
            {
                RmDir(outputDir);
            }
        }
Пример #25
0
		public virtual void  TestTermVectorCorruption3()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(dir, false, new StandardAnalyzer());
			writer.SetMaxBufferedDocs(2);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			writer.SetMergeScheduler(new SerialMergeScheduler());
			writer.SetMergePolicy(new LogDocMergePolicy());
			
			Document document = new Document();
			
			document = new Document();
			Field storedField = new Field("stored", "stored", Field.Store.YES, Field.Index.NO);
			document.Add(storedField);
			Field termVectorField = new Field("termVector", "termVector", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
			document.Add(termVectorField);
			for (int i = 0; i < 10; i++)
				writer.AddDocument(document);
			writer.Close();
			
			writer = new IndexWriter(dir, false, new StandardAnalyzer());
			writer.SetMaxBufferedDocs(2);
			writer.SetRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
			writer.SetMergeScheduler(new SerialMergeScheduler());
			writer.SetMergePolicy(new LogDocMergePolicy());
			for (int i = 0; i < 6; i++)
				writer.AddDocument(document);
			
			writer.Optimize();
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			for (int i = 0; i < 10; i++)
			{
				reader.GetTermFreqVectors(i);
				reader.Document(i);
			}
			reader.Close();
			dir.Close();
		}
Пример #26
0
        /// <summary>
        /// add index 
        /// </summary>
        /// <param name="sampleDatas"></param>
        public static void AddUpdateLuceneIndex(IEnumerable<SampleData> sampleDatas)
        {
            // init lucene
            using (var luceneDirectory = LuceneDirectory)
            {
                var analyzer = new StandardAnalyzer(Version);
                using (var writer = new IndexWriter(luceneDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    writer.SetRAMBufferSizeMB(10);
                    // add data to lucene search index (replaces older entries if any)
                    foreach (var sampleData in sampleDatas) AddToLuceneIndex(sampleData, writer);

                    writer.Commit();
                    // close handles
                    analyzer.Close();
                }
            }
        }
Пример #27
0
 public void SetRAMBufferSizeMB()
 {
     Writer.SetRAMBufferSizeMB(64.0);
 }
        public void processRequest(string queryName, NameValueCollection htKeys)
        {
            string retVal = string.Empty;
              XmlDocument xdoc = new XmlDocument();
              xdoc.LoadXml(new Utility().getConfigXML());
              XmlNode xNode = xdoc.SelectSingleNode(string.Format("//blobdata[@name='default']"));

              string azureAccount = xNode.Attributes["account"].Value;
              string azureEndpoint = xNode.Attributes["endpoint"].Value;
              string azureSharedKey = xNode.Attributes["accountSharedKey"].Value;
              string blobStorage = xNode.Attributes["endpoint"].Value;

              xNode = xdoc.SelectSingleNode(string.Format("//fragmentData/Setting[@name='HandlerFragments']"));
              string fragmentLocation = xNode.Attributes["value"].Value;
              try
              {
            AzureBlobStorage abs = new AzureBlobStorage(azureAccount, blobStorage, azureSharedKey, "SharedKey");
            azureResults ar = new azureResults();
            // Get the page name and replace the .q extension with .xml
            if (!queryName.ToLower().EndsWith(".xml"))
              queryName += ".xml";
            byte[] xmlFragment = abs.GetBlob(fragmentLocation, queryName, "", ref ar, "");
            if (!ar.Succeeded)
            {
              NotifyError(new Exception(ar.StatusCode.ToString()));
            }
            else
            {

              xdoc = new XmlDocument();
              System.Text.ASCIIEncoding enc = new System.Text.ASCIIEncoding();
              xdoc.LoadXml(enc.GetString(xmlFragment));
              /*
               * http://azure-architect.com/portals/16/MOOPData.xsd

               */
              XmlNode xn = xdoc.SelectSingleNode("//storedProcedure[1]");
              string storedProcedureName = xn.Attributes["procedureName"].Value;
              string connectionStringName = xn.Attributes["connectionName"].Value;

              SqlCommand cmd = new SqlCommand(storedProcedureName, new SqlConnection(new Utility().ResolveDataConnection(connectionStringName)));
              cmd.CommandType = CommandType.StoredProcedure;
              XmlNodeList xnl = xdoc.SelectNodes("/MOOPData/luceneData/field");
              Field.Store[] fieldStore = new Field.Store[xnl.Count];
              Field.Index[] indexType = new Field.Index[xnl.Count];
              string[] luceneName = new string[xnl.Count];
              string[] dataName = new string[xnl.Count];
              bool[] isIncludedInOlioSearchFlag = new bool[xnl.Count];
              bool[] isKeyFieldFlag = new bool[xnl.Count];
              string olioSearchFieldName = string.Empty;
              string azureContainerName = string.Empty;
              olioSearchFieldName = xdoc.SelectSingleNode("//MOOPData/luceneData/olioSearchFieldName[1]").InnerText;
              azureContainerName = xdoc.SelectSingleNode("//MOOPData/luceneData/azureContainer[1]").InnerText;
              for (int i = 0; i < xnl.Count; i++)
              {
            XmlNode node = xnl[i];
            switch (node.Attributes["store"].Value.ToLower())
            {
              case "compress": fieldStore[i] = Field.Store.COMPRESS; break;
              case "no": fieldStore[i] = Field.Store.NO; break;
              case "yes": fieldStore[i] = Field.Store.YES; break;
              default: fieldStore[i] = Field.Store.NO; break;
            }

            switch (node.Attributes["index"].Value.ToLower())
            {
              case "analyzed": indexType[i] = Field.Index.ANALYZED; break;
              case "analyzed_no_norms": indexType[i] = Field.Index.ANALYZED_NO_NORMS; break;
              case "no": indexType[i] = Field.Index.NO; break;
              case "no_norms": indexType[i] = Field.Index.NOT_ANALYZED_NO_NORMS; break;
              case "not_analyzed": indexType[i] = Field.Index.NOT_ANALYZED; break;
              case "not_analyzed_no_norms": indexType[i] = Field.Index.NOT_ANALYZED_NO_NORMS; break;
              case "tokenized": indexType[i] = Field.Index.ANALYZED; break;
              case "un_tokenized": indexType[i] = Field.Index.NOT_ANALYZED; break;
              default: indexType[i] = Field.Index.NO; break;
            }
            dataName[i] = node.Attributes["dataName"].Value;
            luceneName[i] = node.Attributes["luceneName"].Value;
            isKeyFieldFlag[i] = node.Attributes["isKeyField"].Value == "true";
            isKeyFieldFlag[i] = node.Attributes["isKeyField"].Value == "true";
            isIncludedInOlioSearchFlag[i] = node.Attributes["isIncludedInOlioSearch"].Value == "true";
              }

              xnl = xdoc.SelectNodes("//parameter");
              foreach (XmlNode node in xnl)
              {
            string parameterName = node.Attributes["parameterName"].Value;
            string urlParameterName = node.Attributes["urlParameterName"].Value;
            string dataType = node.Attributes["dataType"].Value;
            string dataLength = node.Attributes["dataLength"].Value;
            string defaultValue = node.Attributes["defaultValue"].Value;
            if (!parameterName.StartsWith("@"))
              parameterName = "@" + parameterName;
            SqlParameter sp = new SqlParameter();
            sp.ParameterName = parameterName;
            switch (dataType)
            {
              case "bigint": sp.SqlDbType = SqlDbType.BigInt; break;
              case "binary": sp.SqlDbType = SqlDbType.Binary; break;
              case "bit": sp.SqlDbType = SqlDbType.Bit; break;
              case "char": sp.SqlDbType = SqlDbType.Char; break;
              case "date": sp.SqlDbType = SqlDbType.Date; break;
              case "datetime": sp.SqlDbType = SqlDbType.DateTime; break;
              case "datetime2": sp.SqlDbType = SqlDbType.DateTime2; break;
              case "datetimeoffset": sp.SqlDbType = SqlDbType.DateTimeOffset; break;
              case "decimal": sp.SqlDbType = SqlDbType.Decimal; break;
              case "float": sp.SqlDbType = SqlDbType.Float; break;
              case "geography": sp.SqlDbType = SqlDbType.Structured; break;
              case "geometry": sp.SqlDbType = SqlDbType.Structured; break;
              case "hierarchyid": sp.SqlDbType = SqlDbType.Structured; break;
              case "image": sp.SqlDbType = SqlDbType.Image; break;
              case "int": sp.SqlDbType = SqlDbType.Int; break;
              case "money": sp.SqlDbType = SqlDbType.Money; break;
              case "nchar": sp.SqlDbType = SqlDbType.NChar; break;
              case "ntext": sp.SqlDbType = SqlDbType.NText; break;
              case "nvarchar": sp.SqlDbType = SqlDbType.NVarChar; break;
              case "real": sp.SqlDbType = SqlDbType.Real; break;
              case "smalldatetime": sp.SqlDbType = SqlDbType.SmallDateTime; break;
              case "smallint": sp.SqlDbType = SqlDbType.SmallInt; break;
              case "smallmoney": sp.SqlDbType = SqlDbType.SmallMoney; break;
              case "sql_variant": sp.SqlDbType = SqlDbType.Variant; break;
              case "text": sp.SqlDbType = SqlDbType.Text; break;
              case "time": sp.SqlDbType = SqlDbType.Time; break;
              case "timestamp": sp.SqlDbType = SqlDbType.Timestamp; break;
              case "tinyint": sp.SqlDbType = SqlDbType.TinyInt; break;
              case "uniqueidentifier": sp.SqlDbType = SqlDbType.UniqueIdentifier; break;
              case "varbinary": sp.SqlDbType = SqlDbType.VarBinary; break;
              case "varchar": sp.SqlDbType = SqlDbType.VarChar; break;
              case "xml": sp.SqlDbType = SqlDbType.Xml; break;
              default: sp.SqlDbType = SqlDbType.Variant; break;
            }
            switch (urlParameterName.ToLower())
            {
              case "ipaddress": sp.Value = "127.0.0.1"; break;
              case "domainname": sp.Value = ""; break;
              default: if (htKeys[urlParameterName] != null)
                  sp.Value = htKeys[urlParameterName];
                else
                  sp.Value = (defaultValue.ToLower() == "dbnull" ? DBNull.Value
                  : (object)defaultValue);
                break;
            }
            cmd.Parameters.Add(sp);
              }

              cmd.Connection.Open();
              SqlDataReader dr = cmd.ExecuteReader();
              Microsoft.WindowsAzure.StorageCredentialsAccountAndKey scaak = new Microsoft.WindowsAzure.StorageCredentialsAccountAndKey(azureAccount, azureSharedKey);
              Microsoft.WindowsAzure.CloudStorageAccount csa = new Microsoft.WindowsAzure.CloudStorageAccount(scaak, false);
              AzureDirectory azureDirectory = new AzureDirectory(csa, azureContainerName, new RAMDirectory());
              bool findexExists = false;
              try
              {
            findexExists = IndexReader.IndexExists(azureDirectory);
            if ((findexExists) && IndexWriter.IsLocked(azureDirectory))
              azureDirectory.ClearLock("write.lock");
              }
              catch (Exception e)
              {
            Trace.WriteLine(e.ToString());
            return;
              }

              IndexWriter idxW = new IndexWriter(azureDirectory, new SnowballAnalyzer("English"), !findexExists, new IndexWriter.MaxFieldLength(1024));
              idxW.SetRAMBufferSizeMB(10.0);
              idxW.SetUseCompoundFile(false);
              idxW.SetMaxMergeDocs(10000);
              idxW.SetMergeFactor(100);
              while (dr.Read())
              {
            StringBuilder olioSearch = new StringBuilder();
            Document doc = new Document();
            for (int i = 0; i <= dataName.GetUpperBound(0); i++)
            {

              if (isKeyFieldFlag[i])
              {

                NotifyCaller(string.Format("Processing {0}", dr[dataName[i]].ToString().ToLower()));
                idxW.DeleteDocuments(new Term(luceneName[i], dr[dataName[i]].ToString().ToLower()));
                doc.Add(new Field(luceneName[i], dr[dataName[i]].ToString().ToLower(), Field.Store.YES, Field.Index.NOT_ANALYZED));
              }
              else
                try
                {
                  doc.Add(new Field(luceneName[i], dr[dataName[i]].ToString(), fieldStore[i], indexType[i]));

                  if (isIncludedInOlioSearchFlag[i])
                    olioSearch.AppendFormat("\r\n{0}", dr[dataName[i]].ToString());
                }
                catch (Exception ex)
                {
                  NotifyError(ex);
                }
            }
            if (olioSearch.ToString() != string.Empty && olioSearchFieldName != string.Empty)
              doc.Add(new Field(olioSearchFieldName, olioSearch.ToString(), Field.Store.NO, Field.Index.ANALYZED));
            idxW.AddDocument(doc);
              }
              idxW.Commit();
              idxW.Close();

            }
              }

              catch (Exception ex)
              {
            MOOPFramework.FrameworkUtility u = new MOOPFramework.FrameworkUtility(new Utility().ResolveDataConnection("sqlAzureConnection"));
            u.LogData("localhost", "quoteSearchLoader", "testing", string.Empty, string.Empty, "", "QueryError", ex.ToString(),
              u.nvc2XML(htKeys));
            //retVal = string.Format("<!-- {0} -->", ex.ToString());
            NotifyError(new Exception("An error occured but it was logged for later review"));
              }
              finally { if (retVal == string.Empty) retVal = "<root />"; }
        }
Пример #29
0
		private void CreateIndexWriter()
		{
			snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
			indexWriter = new IndexWriter(directory, stopAnalyzer, snapshotter, IndexWriter.MaxFieldLength.UNLIMITED);
			using (indexWriter.MergeScheduler) { }
			indexWriter.SetMergeScheduler(new ErrorLoggingConcurrentMergeScheduler());

			// RavenDB already manages the memory for those, no need for Lucene to do this as well
			indexWriter.SetMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH);
			indexWriter.SetRAMBufferSizeMB(1024);
		}
Пример #30
0
 public static bool Index(Analyzer analyzer, string savepath,string dir, int maxFieldLength, double ramBufferSize, int mergeFactor, int maxBufferedDocs,bool create)
 {
     try
     {
         IndexWriter writer = new IndexWriter(savepath, analyzer, create);
         writer.SetMaxFieldLength(maxFieldLength);
         writer.SetRAMBufferSizeMB(ramBufferSize);
         writer.SetMergeFactor(mergeFactor);
         writer.SetMaxBufferedDocs(maxBufferedDocs);
         IndexDir(writer, dir);
         return true;
     }
     catch (Exception )
     {
         return false;
     }
 }
Пример #31
0
        // Saten
        public String CreateMyIndexDoc()
        {
            String status = "";
            try
            {

                if (dbConnection.State.ToString() == "Closed")
                {
                    dbConnection.Open();
                }

                String storageConnectionString =
                     "DefaultEndpointsProtocol=https;"
                   + "AccountName=classifiedfilestorage;"
                   + "AccountKey=P/GSa/P8vmBicBT45Jgv7pmRHdYWYfJeTFJ963Q1aEHlJDZBCzYkTBcjH1JoGgl+k34x9koBW4lsgYhCym0JLQ==";

                CloudStorageAccount cloudAccount = CloudStorageAccount.Parse(storageConnectionString);

                var cacheDirectory = new RAMDirectory();
                AzureDirectory azureDirectory = new AzureDirectory(cloudAccount, "JobCat",cacheDirectory);
                bool findexExists = IndexReader.IndexExists(azureDirectory);

                IndexWriter indexWriter = null;
                while (indexWriter == null)
                {
                    try
                    {
                        indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30),findexExists, new Lucene.Net.Index.IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
                    }
                    catch (LockObtainFailedException)
                    {
                        Thread.Sleep(1000);
                    }
                };

                indexWriter.SetRAMBufferSizeMB(10.0);

                String query = "SELECT jobID,title,jobCategory from jobs";

                SqlCommand command = new SqlCommand(query, dbConnection);

                SqlDataReader reader = command.ExecuteReader();

                if (reader.HasRows) {

                    while (reader.Read())
                    {
                        Document docx = new Document();
                        docx.Add(new Field("jobID", reader["jobID"].ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                        docx.Add(new Field("title", reader["title"].ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                        docx.Add(new Field("jobCategory", reader["jobCategory"].ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
                        indexWriter.AddDocument(docx);
                    }
                }

                indexWriter.Close();

                dbConnection.Close();

                status = "Success";
            }
            catch (Exception e) {
                status = "" + e;
            }

            return status;
        }
Пример #32
0
 public static bool Index(Analyzer analyzer, FileIndexSet set, int maxFieldLength, double ramBufferSize, int mergeFactor, int maxBufferedDocs, bool create)
 {
     try
     {
         IndexWriter writer = new IndexWriter(set.Path, analyzer, create);
         writer.SetMaxFieldLength(maxFieldLength);
         writer.SetRAMBufferSizeMB(ramBufferSize);
         writer.SetMergeFactor(mergeFactor);
         writer.SetMaxBufferedDocs(maxBufferedDocs);
         foreach (string dir in set.BaseDirs)
         {
             IndexDir(writer, dir);
         }
         return true;
     }
     catch (Exception )
     {
         return false;
     }
 }