public void ApplyToWriter(IndexWriter writer) { try { if (MergeFactor != null) { writer.SetMergeFactor((int) MergeFactor); } if (MaxMergeDocs != null) { writer.SetMaxMergeDocs((int) MaxMergeDocs); } if (MaxBufferedDocs != null) { writer.SetMaxBufferedDocs((int) MaxBufferedDocs); } if (RamBufferSizeMb != null) { writer.SetRAMBufferSizeMB((int) RamBufferSizeMb); } if (TermIndexInterval != null) { writer.SetTermIndexInterval((int) TermIndexInterval); } } catch (ArgumentOutOfRangeException) { // TODO: Log it } }
public static IndexWriter GetAzureIndexWriter(this LuceneIndexer indexer) { indexer.EnsureIndex(false); var writer = new IndexWriter(indexer.GetLuceneDirectory(), indexer.IndexingAnalyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetRAMBufferSizeMB(10.0); writer.SetUseCompoundFile(false); writer.SetMaxMergeDocs(10000); writer.SetMergeFactor(100); return writer; }
/// <summary> /// Retrieve a read/write <see cref="IndexWriter" /> /// </summary> /// <param name="provider"></param> /// <param name="entity"></param> /// <param name="modificationOperation"></param> /// <returns></returns> public IndexWriter GetIndexWriter(IDirectoryProvider provider, System.Type entity, bool modificationOperation) { // Have to close the reader before the writer is accessed. IndexReader reader; readers.TryGetValue(provider, out reader); if (reader != null) { try { reader.Close(); } catch (IOException ex) { throw new SearchException("Exception while closing IndexReader", ex); } finally { readers.Remove(provider); // PH - Moved the exit lock out of the try otherwise it won't take place when we have an error closing the reader. // Exit Lock added by Kailuo Wang, because the lock needs to be obtained immediately afterwards object syncLock = searchFactoryImplementor.GetLockableDirectoryProviders()[provider]; Monitor.Exit(syncLock); } } if (writers.ContainsKey(provider)) { return writers[provider]; } LockProvider(provider); if (modificationOperation) dpStatistics[provider].Operations++; try { Analyzer analyzer = entity != null ? searchFactoryImplementor.DocumentBuilders[entity].Analyzer : new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(provider.Directory, analyzer, false); LuceneIndexingParameters indexingParams = searchFactoryImplementor.GetIndexingParameters(provider); if (IsBatch) { writer.SetMergeFactor(indexingParams.BatchMergeFactor); writer.SetMaxMergeDocs(indexingParams.BatchMaxMergeDocs); writer.SetMaxBufferedDocs(indexingParams.BatchMaxBufferedDocs); } else { writer.SetMergeFactor(indexingParams.TransactionMergeFactor); writer.SetMaxMergeDocs(indexingParams.TransactionMaxMergeDocs); writer.SetMaxBufferedDocs(indexingParams.TransactionMaxBufferedDocs); } writers.Add(provider, writer); return writer; } catch (IOException ex) { CleanUp(new SearchException("Unable to open IndexWriter" + (entity != null ? " for " + entity : ""), ex)); } return null; }
public virtual void TestSetMaxMergeDocs() { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true); iw.SetMergeScheduler(new MyMergeScheduler(this)); iw.SetMaxMergeDocs(20); iw.SetMaxBufferedDocs(2); iw.SetMergeFactor(2); Document document = new Document(); document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); for (int i = 0; i < 177; i++) iw.AddDocument(document); iw.Close(); }
static void Main(string[] args) { // get settings from azure settings or app.config CloudStorageAccount.SetConfigurationSettingPublisher((configName, configSetter) => { try { configSetter(RoleEnvironment.GetConfigurationSettingValue(configName)); } catch (Exception) { // for a console app, reading from App.config configSetter(System.Configuration.ConfigurationManager.AppSettings[configName]); } }); // default AzureDirectory stores cache in local temp folder AzureDirectory azureDirectory = new AzureDirectory(CloudStorageAccount.FromConfigurationSetting("blobStorage"), "TestCatalog"); bool findexExists = false; try { findexExists = IndexReader.IndexExists(azureDirectory); if ((findexExists) && IndexReader.IsLocked(azureDirectory)) azureDirectory.ClearLock("write.lock"); } catch (Exception e) { Console.WriteLine(e.ToString()); return; } IndexWriter indexWriter = new IndexWriter(azureDirectory, new Lucene.Net.Analysis.Standard.StandardAnalyzer(), !findexExists); indexWriter.SetRAMBufferSizeMB(10.0); indexWriter.SetUseCompoundFile(false); indexWriter.SetMaxMergeDocs(10000); indexWriter.SetMergeFactor(100); fExit = true; for (int iDoc = 0; iDoc < 100; iDoc++) { if (fExit) break; if (iDoc % 10 == 0) Console.WriteLine(iDoc); Document doc = new Document(); doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); indexWriter.AddDocument(doc); } Console.WriteLine("Total docs is {0}", indexWriter.DocCount()); indexWriter.Close(); IndexSearcher searcher; using (new AutoStopWatch("Creating searcher")) { searcher = new IndexSearcher(azureDirectory); // IndexReader.Open( } SearchForPhrase(searcher, "dog"); SearchForPhrase(searcher, _random.Next(32768).ToString()); SearchForPhrase(searcher, _random.Next(32768).ToString()); }
static void Main(string[] args) { // get settings from azure settings or app.config CloudStorageAccount.SetConfigurationSettingPublisher((configName, configSetter) => { try { configSetter(RoleEnvironment.GetConfigurationSettingValue(configName)); } catch (Exception) { // for a console app, reading from App.config configSetter(System.Configuration.ConfigurationManager.AppSettings[configName]); } }); // default AzureDirectory stores cache in local temp folder AzureDirectory azureDirectory = new AzureDirectory(CloudStorageAccount.FromConfigurationSetting("blobStorage"), "TestCatalog6"); bool findexExists = IndexReader.IndexExists(azureDirectory); IndexWriter indexWriter = null; while (indexWriter == null) { try { indexWriter = new IndexWriter(azureDirectory, new StandardAnalyzer(), !IndexReader.IndexExists(azureDirectory)); } catch (LockObtainFailedException) { Console.WriteLine("Lock is taken, Hit 'Y' to clear the lock, or anything else to try again"); if (Console.ReadLine().ToLower().Trim() == "y" ) azureDirectory.ClearLock("write.lock"); } }; Console.WriteLine("IndexWriter lock obtained, this process has exclusive write access to index"); indexWriter.SetRAMBufferSizeMB(10.0); indexWriter.SetUseCompoundFile(false); indexWriter.SetMaxMergeDocs(10000); indexWriter.SetMergeFactor(100); for (int iDoc = 0; iDoc < 10000; iDoc++) { if (iDoc % 10 == 0) Console.WriteLine(iDoc); Document doc = new Document(); doc.Add(new Field("id", DateTime.Now.ToFileTimeUtc().ToString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); doc.Add(new Field("Title", GeneratePhrase(10), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); doc.Add(new Field("Body", GeneratePhrase(40), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); indexWriter.AddDocument(doc); } Console.WriteLine("Total docs is {0}", indexWriter.DocCount()); indexWriter.Close(); IndexSearcher searcher; using (new AutoStopWatch("Creating searcher")) { searcher = new IndexSearcher(azureDirectory); } SearchForPhrase(searcher, "dog"); SearchForPhrase(searcher, _random.Next(32768).ToString()); SearchForPhrase(searcher, _random.Next(32768).ToString()); }
public void optimizeIndex(string azureContainerName) { XmlDocument xdoc = new XmlDocument(); xdoc.LoadXml(new Utility().getConfigXML()); XmlNode xNode = xdoc.SelectSingleNode(string.Format("//blobdata[@name='default']")); string azureAccount = xNode.Attributes["account"].Value; string azureEndpoint = xNode.Attributes["endpoint"].Value; string azureSharedKey = xNode.Attributes["accountSharedKey"].Value; string blobStorage = xNode.Attributes["endpoint"].Value; xNode = xdoc.SelectSingleNode(string.Format("//fragmentData/Setting[@name='HandlerFragments']")); string fragmentLocation = xNode.Attributes["value"].Value; Microsoft.WindowsAzure.StorageCredentialsAccountAndKey scaak = new Microsoft.WindowsAzure.StorageCredentialsAccountAndKey(azureAccount, azureSharedKey); Microsoft.WindowsAzure.CloudStorageAccount csa = new Microsoft.WindowsAzure.CloudStorageAccount(scaak, false); AzureDirectory azureDirectory = new AzureDirectory(csa, azureContainerName, new RAMDirectory()); bool findexExists = false; try { findexExists = IndexReader.IndexExists(azureDirectory); if ((findexExists) && IndexWriter.IsLocked(azureDirectory)) azureDirectory.ClearLock("write.lock"); } catch (Exception e) { Trace.WriteLine(e.ToString()); return; } IndexWriter idxW = new IndexWriter(azureDirectory, new SnowballAnalyzer("English"), !findexExists, new IndexWriter.MaxFieldLength(1024)); idxW.SetRAMBufferSizeMB(10.0); idxW.SetUseCompoundFile(false); idxW.SetMaxMergeDocs(10000); idxW.SetMergeFactor(100); idxW.Optimize(); }
public void processRequest(string queryName, NameValueCollection htKeys) { string retVal = string.Empty; XmlDocument xdoc = new XmlDocument(); xdoc.LoadXml(new Utility().getConfigXML()); XmlNode xNode = xdoc.SelectSingleNode(string.Format("//blobdata[@name='default']")); string azureAccount = xNode.Attributes["account"].Value; string azureEndpoint = xNode.Attributes["endpoint"].Value; string azureSharedKey = xNode.Attributes["accountSharedKey"].Value; string blobStorage = xNode.Attributes["endpoint"].Value; xNode = xdoc.SelectSingleNode(string.Format("//fragmentData/Setting[@name='HandlerFragments']")); string fragmentLocation = xNode.Attributes["value"].Value; try { AzureBlobStorage abs = new AzureBlobStorage(azureAccount, blobStorage, azureSharedKey, "SharedKey"); azureResults ar = new azureResults(); // Get the page name and replace the .q extension with .xml if (!queryName.ToLower().EndsWith(".xml")) queryName += ".xml"; byte[] xmlFragment = abs.GetBlob(fragmentLocation, queryName, "", ref ar, ""); if (!ar.Succeeded) { NotifyError(new Exception(ar.StatusCode.ToString())); } else { xdoc = new XmlDocument(); System.Text.ASCIIEncoding enc = new System.Text.ASCIIEncoding(); xdoc.LoadXml(enc.GetString(xmlFragment)); /* * http://azure-architect.com/portals/16/MOOPData.xsd */ XmlNode xn = xdoc.SelectSingleNode("//storedProcedure[1]"); string storedProcedureName = xn.Attributes["procedureName"].Value; string connectionStringName = xn.Attributes["connectionName"].Value; SqlCommand cmd = new SqlCommand(storedProcedureName, new SqlConnection(new Utility().ResolveDataConnection(connectionStringName))); cmd.CommandType = CommandType.StoredProcedure; XmlNodeList xnl = xdoc.SelectNodes("/MOOPData/luceneData/field"); Field.Store[] fieldStore = new Field.Store[xnl.Count]; Field.Index[] indexType = new Field.Index[xnl.Count]; string[] luceneName = new string[xnl.Count]; string[] dataName = new string[xnl.Count]; bool[] isIncludedInOlioSearchFlag = new bool[xnl.Count]; bool[] isKeyFieldFlag = new bool[xnl.Count]; string olioSearchFieldName = string.Empty; string azureContainerName = string.Empty; olioSearchFieldName = xdoc.SelectSingleNode("//MOOPData/luceneData/olioSearchFieldName[1]").InnerText; azureContainerName = xdoc.SelectSingleNode("//MOOPData/luceneData/azureContainer[1]").InnerText; for (int i = 0; i < xnl.Count; i++) { XmlNode node = xnl[i]; switch (node.Attributes["store"].Value.ToLower()) { case "compress": fieldStore[i] = Field.Store.COMPRESS; break; case "no": fieldStore[i] = Field.Store.NO; break; case "yes": fieldStore[i] = Field.Store.YES; break; default: fieldStore[i] = Field.Store.NO; break; } switch (node.Attributes["index"].Value.ToLower()) { case "analyzed": indexType[i] = Field.Index.ANALYZED; break; case "analyzed_no_norms": indexType[i] = Field.Index.ANALYZED_NO_NORMS; break; case "no": indexType[i] = Field.Index.NO; break; case "no_norms": indexType[i] = Field.Index.NOT_ANALYZED_NO_NORMS; break; case "not_analyzed": indexType[i] = Field.Index.NOT_ANALYZED; break; case "not_analyzed_no_norms": indexType[i] = Field.Index.NOT_ANALYZED_NO_NORMS; break; case "tokenized": indexType[i] = Field.Index.ANALYZED; break; case "un_tokenized": indexType[i] = Field.Index.NOT_ANALYZED; break; default: indexType[i] = Field.Index.NO; break; } dataName[i] = node.Attributes["dataName"].Value; luceneName[i] = node.Attributes["luceneName"].Value; isKeyFieldFlag[i] = node.Attributes["isKeyField"].Value == "true"; isKeyFieldFlag[i] = node.Attributes["isKeyField"].Value == "true"; isIncludedInOlioSearchFlag[i] = node.Attributes["isIncludedInOlioSearch"].Value == "true"; } xnl = xdoc.SelectNodes("//parameter"); foreach (XmlNode node in xnl) { string parameterName = node.Attributes["parameterName"].Value; string urlParameterName = node.Attributes["urlParameterName"].Value; string dataType = node.Attributes["dataType"].Value; string dataLength = node.Attributes["dataLength"].Value; string defaultValue = node.Attributes["defaultValue"].Value; if (!parameterName.StartsWith("@")) parameterName = "@" + parameterName; SqlParameter sp = new SqlParameter(); sp.ParameterName = parameterName; switch (dataType) { case "bigint": sp.SqlDbType = SqlDbType.BigInt; break; case "binary": sp.SqlDbType = SqlDbType.Binary; break; case "bit": sp.SqlDbType = SqlDbType.Bit; break; case "char": sp.SqlDbType = SqlDbType.Char; break; case "date": sp.SqlDbType = SqlDbType.Date; break; case "datetime": sp.SqlDbType = SqlDbType.DateTime; break; case "datetime2": sp.SqlDbType = SqlDbType.DateTime2; break; case "datetimeoffset": sp.SqlDbType = SqlDbType.DateTimeOffset; break; case "decimal": sp.SqlDbType = SqlDbType.Decimal; break; case "float": sp.SqlDbType = SqlDbType.Float; break; case "geography": sp.SqlDbType = SqlDbType.Structured; break; case "geometry": sp.SqlDbType = SqlDbType.Structured; break; case "hierarchyid": sp.SqlDbType = SqlDbType.Structured; break; case "image": sp.SqlDbType = SqlDbType.Image; break; case "int": sp.SqlDbType = SqlDbType.Int; break; case "money": sp.SqlDbType = SqlDbType.Money; break; case "nchar": sp.SqlDbType = SqlDbType.NChar; break; case "ntext": sp.SqlDbType = SqlDbType.NText; break; case "nvarchar": sp.SqlDbType = SqlDbType.NVarChar; break; case "real": sp.SqlDbType = SqlDbType.Real; break; case "smalldatetime": sp.SqlDbType = SqlDbType.SmallDateTime; break; case "smallint": sp.SqlDbType = SqlDbType.SmallInt; break; case "smallmoney": sp.SqlDbType = SqlDbType.SmallMoney; break; case "sql_variant": sp.SqlDbType = SqlDbType.Variant; break; case "text": sp.SqlDbType = SqlDbType.Text; break; case "time": sp.SqlDbType = SqlDbType.Time; break; case "timestamp": sp.SqlDbType = SqlDbType.Timestamp; break; case "tinyint": sp.SqlDbType = SqlDbType.TinyInt; break; case "uniqueidentifier": sp.SqlDbType = SqlDbType.UniqueIdentifier; break; case "varbinary": sp.SqlDbType = SqlDbType.VarBinary; break; case "varchar": sp.SqlDbType = SqlDbType.VarChar; break; case "xml": sp.SqlDbType = SqlDbType.Xml; break; default: sp.SqlDbType = SqlDbType.Variant; break; } switch (urlParameterName.ToLower()) { case "ipaddress": sp.Value = "127.0.0.1"; break; case "domainname": sp.Value = ""; break; default: if (htKeys[urlParameterName] != null) sp.Value = htKeys[urlParameterName]; else sp.Value = (defaultValue.ToLower() == "dbnull" ? DBNull.Value : (object)defaultValue); break; } cmd.Parameters.Add(sp); } cmd.Connection.Open(); SqlDataReader dr = cmd.ExecuteReader(); Microsoft.WindowsAzure.StorageCredentialsAccountAndKey scaak = new Microsoft.WindowsAzure.StorageCredentialsAccountAndKey(azureAccount, azureSharedKey); Microsoft.WindowsAzure.CloudStorageAccount csa = new Microsoft.WindowsAzure.CloudStorageAccount(scaak, false); AzureDirectory azureDirectory = new AzureDirectory(csa, azureContainerName, new RAMDirectory()); bool findexExists = false; try { findexExists = IndexReader.IndexExists(azureDirectory); if ((findexExists) && IndexWriter.IsLocked(azureDirectory)) azureDirectory.ClearLock("write.lock"); } catch (Exception e) { Trace.WriteLine(e.ToString()); return; } IndexWriter idxW = new IndexWriter(azureDirectory, new SnowballAnalyzer("English"), !findexExists, new IndexWriter.MaxFieldLength(1024)); idxW.SetRAMBufferSizeMB(10.0); idxW.SetUseCompoundFile(false); idxW.SetMaxMergeDocs(10000); idxW.SetMergeFactor(100); while (dr.Read()) { StringBuilder olioSearch = new StringBuilder(); Document doc = new Document(); for (int i = 0; i <= dataName.GetUpperBound(0); i++) { if (isKeyFieldFlag[i]) { NotifyCaller(string.Format("Processing {0}", dr[dataName[i]].ToString().ToLower())); idxW.DeleteDocuments(new Term(luceneName[i], dr[dataName[i]].ToString().ToLower())); doc.Add(new Field(luceneName[i], dr[dataName[i]].ToString().ToLower(), Field.Store.YES, Field.Index.NOT_ANALYZED)); } else try { doc.Add(new Field(luceneName[i], dr[dataName[i]].ToString(), fieldStore[i], indexType[i])); if (isIncludedInOlioSearchFlag[i]) olioSearch.AppendFormat("\r\n{0}", dr[dataName[i]].ToString()); } catch (Exception ex) { NotifyError(ex); } } if (olioSearch.ToString() != string.Empty && olioSearchFieldName != string.Empty) doc.Add(new Field(olioSearchFieldName, olioSearch.ToString(), Field.Store.NO, Field.Index.ANALYZED)); idxW.AddDocument(doc); } idxW.Commit(); idxW.Close(); } } catch (Exception ex) { MOOPFramework.FrameworkUtility u = new MOOPFramework.FrameworkUtility(new Utility().ResolveDataConnection("sqlAzureConnection")); u.LogData("localhost", "quoteSearchLoader", "testing", string.Empty, string.Empty, "", "QueryError", ex.ToString(), u.nvc2XML(htKeys)); //retVal = string.Format("<!-- {0} -->", ex.ToString()); NotifyError(new Exception("An error occured but it was logged for later review")); } finally { if (retVal == string.Empty) retVal = "<root />"; } }
public override void Run() { Trace.WriteLine(DateTime.Now.ToString() + " [INIT]"); // Create azure account credentials CloudStorageAccount.SetConfigurationSettingPublisher((configName, configSetter) => { configSetter(CloudSettingsResolver.GetConfigSetting(configName)); }); CloudStorageAccount cloudStorageAccount = CloudStorageAccount.FromConfigurationSetting("BlobStorageEndpoint"); // Create reference to index queue CloudQueueClient client = cloudStorageAccount.CreateCloudQueueClient(); CloudQueue queue = client.GetQueueReference("searchindexqueue"); queue.CreateIfNotExist(); queue.Clear(); // Create lucene index writer and optimize index (only on startup) _Directory = new AzureDirectory(cloudStorageAccount, "LuceneStorage", new RAMDirectory()); while (true) { try { int queueItems = queue.RetrieveApproximateMessageCount(); Trace.WriteLine(DateTime.Now.ToString() + " [QUEUE_CHECK] " + queueItems + " items."); if (queueItems > 0) { // Get index writer if (IndexWriter.IsLocked(_Directory)) _Directory.ClearLock("write.lock"); IndexWriter indexWriter = new IndexWriter(_Directory, new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29), false, new IndexWriter.MaxFieldLength(1000)); indexWriter.SetRAMBufferSizeMB(10.0); indexWriter.SetUseCompoundFile(false); indexWriter.SetMaxMergeDocs(10000); indexWriter.SetMergeFactor(100); try { // Get dictionary to skip duplicated queue items Dictionary<long, DateTime> updatedSummaries = new Dictionary<long, DateTime>(); // Retrieve batch of messages to iterate var msgs = queue.GetMessages(32); while (msgs.Count() > 0) { foreach (var msg in msgs) { Trace.WriteLine(DateTime.Now.ToString() + " [MSG] " + msg.AsString); long summaryId = long.Parse(msg.AsString); // zero = re-index the search index // positive number = add/update PointDataSummary to index // negative number = delete PointDataSummary from index if (summaryId == 0) { Trace.WriteLine(DateTime.Now.ToString() + " [REINDEX]"); indexWriter.Close(); indexWriter = new IndexWriter(_Directory, new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29), true, new IndexWriter.MaxFieldLength(1000)); indexWriter.SetRAMBufferSizeMB(10.0); indexWriter.SetUseCompoundFile(false); indexWriter.SetMaxMergeDocs(10000); indexWriter.SetMergeFactor(100); List<PointDataSummary> summaries = PointDataSummary.All().ToList(); updatedSummaries = new Dictionary<long, DateTime>(); foreach (var summary in summaries) { Index(indexWriter, summary); Trace.WriteLine(DateTime.Now.ToString() + " [INDEX] " + summary.Id); updatedSummaries.Add(summaryId, DateTime.UtcNow); } } else if (updatedSummaries.ContainsKey(summaryId) && ((DateTime)updatedSummaries[summaryId]) >= msg.InsertionTime) { Trace.WriteLine(DateTime.Now.ToString() + " [SKIPPED] " + msg.AsString); } else { if (summaryId < 0) { indexWriter.DeleteDocuments(new Lucene.Net.Index.Term("point_id", (-summaryId).ToString())); } else { var summary = PointDataSummary.SingleOrDefault(p => p.Id == summaryId); if (summary != null) { Index(indexWriter, summary); Trace.WriteLine(DateTime.Now.ToString() + " [INDEX] " + summary.Id); } } updatedSummaries.Remove(summaryId); updatedSummaries.Add(summaryId, DateTime.UtcNow); } // Delete message from queue queue.DeleteMessage(msg); } // Retrieve batch of messages to iterate msgs = queue.GetMessages(32); } } catch (Exception ex) { Trace.WriteLine(ex.ToString()); } finally { indexWriter.Close(); } } } catch (Exception ex) { Trace.WriteLine(DateTime.Now + " [ERROR] " + ex.Message); } finally { Trace.WriteLine(DateTime.Now.ToString() + " [QUEUE_CHECK] End"); Thread.Sleep(_SleepInMinutes * 60 * 1000); } } }