public virtual void TestAddSameDocTwice() { // LUCENE-5367: this was a problem with the previous code, making sure it // works with the new code. Directory indexDir = NewDirectory(), taxoDir = NewDirectory(); IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig facetsConfig = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("a", "b")); doc = facetsConfig.Build(taxoWriter, doc); // these two addDocument() used to fail indexWriter.AddDocument(doc); indexWriter.AddDocument(doc); IOUtils.Close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.Open(indexDir); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); IndexSearcher searcher = NewSearcher(indexReader); FacetsCollector fc = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), fc); Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc); FacetResult res = facets.GetTopChildren(10, "a"); Assert.AreEqual(1, res.LabelValues.Length); Assert.AreEqual(2, res.LabelValues[0].Value); IOUtils.Close(indexReader, taxoReader); IOUtils.Close(indexDir, taxoDir); }
public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir) { int[] freq = new int[nTerms]; Terms = new Term[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int)Math.Ceiling(Math.Pow(f, power)); Terms[i] = new Term("f", char.ToString((char)('A' + i))); } IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE)); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (Random().Next(freq[j]) == 0) { d.Add(NewStringField("f", Terms[j].Text(), Field.Store.NO)); //System.out.println(d); } } iw.AddDocument(d); } iw.ForceMerge(1); iw.Dispose(); }
/// <summary> /// Annotates the given sequence of <see cref="Document"/> objects by adding a <b>_highlight</b> field; /// the <b>_highlight</b> field will contain the best matching text fragment from the <see cref="Document"/> /// object's full-text field. /// </summary> /// <param name="hits">The sequence of <see cref="Document"/> objects.</param> /// <param name="criteria">The search criteria that produced the hits.</param> /// <returns> /// The original sequence of Document objects, with a <b>_highlight</b> field added to each Document. /// </returns> public static IEnumerable<Document> GenerateHighlights(this IEnumerable<Document> hits, SearchCriteria criteria) { if (hits == null) throw new ArgumentNullException(nameof(hits)); if (criteria == null) throw new ArgumentNullException(nameof(criteria)); if (String.IsNullOrWhiteSpace(criteria.Query)) throw new ArgumentException("SearchCriteria.Query cannot be empty"); var documents = hits.ToList(); try { var indexDirectory = new RAMDirectory(); var analyzer = new FullTextAnalyzer(); var config = new IndexWriterConfig(analyzer).SetRAMBufferSizeMB(_ramBufferSizeMB); var writer = new IndexWriter(indexDirectory, config); BuidIndex(documents, writer); GenerateHighlights(documents, writer, criteria); writer.DeleteAll(); writer.Commit(); writer.Close(); indexDirectory.Close(); } catch (Exception ex) { _log.Error(ex); } return documents; }
public virtual void TestCustomLockFactory() { Directory dir = new MockDirectoryWrapper(Random(), new RAMDirectory()); MockLockFactory lf = new MockLockFactory(this); dir.LockFactory = lf; // Lock prefix should have been set: Assert.IsTrue(lf.LockPrefixSet, "lock prefix was not set by the RAMDirectory"); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); // add 100 documents (so that commit lock is used) for (int i = 0; i < 100; i++) { AddDoc(writer); } // Both write lock and commit lock should have been created: Assert.AreEqual(1, lf.LocksCreated.Count, "# of unique locks created (after instantiating IndexWriter)"); Assert.IsTrue(lf.MakeLockCount >= 1, "# calls to makeLock is 0 (after instantiating IndexWriter)"); foreach (String lockName in lf.LocksCreated.Keys) { MockLockFactory.MockLock @lock = (MockLockFactory.MockLock)lf.LocksCreated[lockName]; Assert.IsTrue(@lock.LockAttempts > 0, "# calls to Lock.obtain is 0 (after instantiating IndexWriter)"); } writer.Dispose(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public static void main(String[] args) throws java.io.IOException public static void Main(string[] args) { if (args.Length < 3) { Console.Error.WriteLine("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ..."); Environment.Exit(1); } FSDirectory mergedIndex = FSDirectory.open(new File(args[0])); IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(Version.LUCENE_CURRENT, null) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); Directory[] indexes = new Directory[args.Length - 1]; for (int i = 1; i < args.Length; i++) { indexes[i - 1] = FSDirectory.open(new File(args[i])); } Console.WriteLine("Merging..."); writer.addIndexes(indexes); Console.WriteLine("Full merge..."); writer.forceMerge(1); writer.close(); Console.WriteLine("Done."); }
public virtual void TestMmapIndex() { // sometimes the directory is not cleaned by rmDir, because on Windows it // may take some time until the files are finally dereferenced. So clean the // directory up front, or otherwise new IndexWriter will fail. DirectoryInfo dirPath = CreateTempDir("testLuceneMmap"); RmDir(dirPath); MMapDirectory dir = new MMapDirectory(dirPath, null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. MockAnalyzer analyzer = new MockAnalyzer(Random()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); writer.Commit(); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); int num = AtLeast(1000); for (int dx = 0; dx < num; dx++) { string f = RandomField(); Document doc = new Document(); doc.Add(NewTextField("data", f, Field.Store.YES)); writer.AddDocument(doc); } reader.Dispose(); writer.Dispose(); RmDir(dirPath); }
public LogDocMergePolicy(IndexWriter writer):base(writer) { minMergeSize = DEFAULT_MIN_MERGE_DOCS; // maxMergeSize is never used by LogDocMergePolicy; set // it to Long.MAX_VALUE to disable it maxMergeSize = System.Int64.MaxValue; }
protected override MergeThread GetMergeThread(IndexWriter writer, MergePolicy.OneMerge merge) { MergeThread thread = new MyMergeThread(this, writer, merge); thread.ThreadPriority = MergeThreadPriority; thread.SetDaemon(true); thread.Name = "MyMergeThread"; return thread; }
public LogByteSizeMergePolicy(IndexWriter writer) : base(writer) { minMergeSize = (long) (DEFAULT_MIN_MERGE_MB * 1024 * 1024); //mgarski - the line below causes an overflow in .NET, resulting in a negative number... //maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB * 1024 * 1024); maxMergeSize = DEFAULT_MAX_MERGE_MB; }
private void AddDocs2(IndexWriter writer, int numDocs) { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "bbb", Field.Store.NO)); writer.AddDocument(doc); } }
public IndexerThread(IndexWriter w, FacetsConfig config, TaxonomyWriter tw, ReferenceManager<SearcherAndTaxonomy> mgr, int ordLimit, AtomicBoolean stop) { this.w = w; this.config = config; this.tw = tw; this.mgr = mgr; this.ordLimit = ordLimit; this.stop = stop; }
private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf) { LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy(); logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain // files conf.SetMergePolicy(logByteSizeMergePolicy); IndexWriter writer = new IndexWriter(dir, conf); return writer; }
private void AddDocs3(IndexWriter writer, int numDocs) { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewTextField("content", "ccc", Field.Store.NO)); doc.Add(NewStringField("id", "" + i, Field.Store.YES)); writer.AddDocument(doc); } }
// TODO: this should be setUp().... public virtual void CreateDummySearcher() { // Create a dummy index with nothing in it. // this could possibly fail if Lucene starts checking for docid ranges... d = NewDirectory(); IndexWriter iw = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); iw.AddDocument(new Document()); iw.Dispose(); r = DirectoryReader.Open(d); s = NewSearcher(r); }
public override void TearDown() { Iw.Dispose(); TestUtil.CheckIndex(Dir); // for some extra coverage, checkIndex before we forceMerge Iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); IndexWriter iw = new IndexWriter(Dir, (IndexWriterConfig)Iwc.Clone()); iw.ForceMerge(1); iw.Dispose(); Dir.Dispose(); // just force a checkindex for now base.TearDown(); }
public void BeforeClassSorterUtilTest() { // only read the values of the undeleted documents, since after addIndexes, // the deleted ones will be dropped from the index. Bits liveDocs = reader.LiveDocs; List<int> values = new List<int>(); for (int i = 0; i < reader.MaxDoc; i++) { if (liveDocs == null || liveDocs.Get(i)) { values.Add(int.Parse(reader.Document(i).Get(ID_FIELD), CultureInfo.InvariantCulture)); } } int idx = Random().nextInt(SORT.Length); Sort sorter = SORT[idx]; if (idx == 1) { // reverse doc sort values.Reverse(); } else { values.Sort(); if (Random().nextBoolean()) { sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type_e.LONG, true)); // descending values.Reverse(); } } sortedValues = values.ToArray(); if (VERBOSE) { Console.WriteLine("sortedValues: " + sortedValues); Console.WriteLine("Sorter: " + sorter); } Directory target = NewDirectory(); using (IndexWriter writer = new IndexWriter(target, NewIndexWriterConfig(TEST_VERSION_CURRENT, null))) { using (reader = SortingAtomicReader.Wrap(reader, sorter)) { writer.AddIndexes(reader); } } dir.Dispose(); // CheckIndex the target directory dir = target; TestUtil.CheckIndex(dir); // set reader for tests reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); assertFalse("index should not have deletions", reader.HasDeletions); }
/// <summary>Just do the merges in sequence. We do this /// "synchronized" so that even if the application is using /// multiple threads, only one merge may run at a time. /// </summary> public override void Merge(IndexWriter writer) { lock (this) { while (true) { MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) break; writer.Merge(merge); } } }
/// <summary> /// Just do the merges in sequence. We do this /// "synchronized" so that even if the application is using /// multiple threads, only one merge may run at a time. /// </summary> public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { lock (this) { while (true) { MergePolicy.OneMerge merge = writer.NextMerge; if (merge == null) { break; } writer.Merge(merge); } } }
public virtual void TestFailIfIndexWriterNotClosed() { MockDirectoryWrapper dir = NewMockDirectory(); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); try { dir.Dispose(); Assert.Fail(); } catch (Exception expected) { Assert.IsTrue(expected.Message.Contains("there are still open locks")); } iw.Dispose(); dir.Dispose(); }
public virtual void TestBasic() { HashSet<string> fileExtensions = new HashSet<string>(); fileExtensions.Add(Lucene40StoredFieldsWriter.FIELDS_EXTENSION); fileExtensions.Add(Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(Random(), new RAMDirectory()); primaryDir.CheckIndexOnClose = false; // only part of an index MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(Random(), new RAMDirectory()); secondaryDir.CheckIndexOnClose = false; // only part of an index FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true); // for now we wire Lucene40Codec because we rely upon its specific impl bool oldValue = OLD_FORMAT_IMPERSONATION_IS_ACTIVE; OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; IndexWriter writer = new IndexWriter(fsd, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NewLogMergePolicy(false)).SetCodec(Codec.ForName("Lucene40")).SetUseCompoundFile(false)); TestIndexWriterReader.CreateIndexNoClose(true, "ram", writer); IndexReader reader = DirectoryReader.Open(writer, true); Assert.AreEqual(100, reader.MaxDoc); writer.Commit(); // we should see only fdx,fdt files here string[] files = primaryDir.ListAll(); Assert.IsTrue(files.Length > 0); for (int x = 0; x < files.Length; x++) { string ext = FileSwitchDirectory.GetExtension(files[x]); Assert.IsTrue(fileExtensions.Contains(ext)); } files = secondaryDir.ListAll(); Assert.IsTrue(files.Length > 0); // we should not see fdx,fdt files here for (int x = 0; x < files.Length; x++) { string ext = FileSwitchDirectory.GetExtension(files[x]); Assert.IsFalse(fileExtensions.Contains(ext)); } reader.Dispose(); writer.Dispose(); files = fsd.ListAll(); for (int i = 0; i < files.Length; i++) { Assert.IsNotNull(files[i]); } fsd.Dispose(); OLD_FORMAT_IMPERSONATION_IS_ACTIVE = oldValue; }
public override void SetUp() { base.SetUp(); Directory = NewDirectory(); IndexWriter writer = new IndexWriter(Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); //writer.setNoCFSRatio(0.0); //writer.infoStream = System.out; FieldType customType = new FieldType(TextField.TYPE_STORED); customType.Tokenized = false; customType.StoreTermVectors = true; for (int i = 0; i < NumDocs; i++) { Documents.Document doc = new Documents.Document(); Field fld = NewField("field", English.IntToEnglish(i), customType); doc.Add(fld); writer.AddDocument(doc); } writer.Dispose(); }
public ThumbnailIndexer(IndexPreferences indexPreferences) { this.indexPreferences = indexPreferences; if (!System.IO.Directory.Exists(Preferences.Instance.ThumbIndexFolder)) { logger.Info("Creating thumbs folder: '{0}'", Preferences.Instance.ThumbIndexFolder); System.IO.Directory.CreateDirectory(Preferences.Instance.ThumbIndexFolder); } var config = new IndexWriterConfig(FindAPhotoAnalyzers.IndexVersion, FindAPhotoAnalyzers.ForIndexing()); writer = new IndexWriter( FSDirectory.open(new java.io.File(Preferences.Instance.ThumbIndexFolder)), config); writer.commit(); searchManager = new SearchManager(writer.getDirectory()); tasks.Add(Task.Run(DequeueFiles)); tasks.Add(Task.Run(DequeueFiles)); tasks.Add(Task.Run(CommitTask)); }
public override void SetUp() { base.SetUp(); //IndexDir = CreateTempDir("RAMDirIndex"); string tempDir = Path.GetTempPath(); if (tempDir == null) throw new IOException("java.io.tmpdir undefined, cannot run test"); IndexDir = new DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex")); Directory dir = NewFSDirectory(IndexDir); IndexWriter writer = new IndexWriter(dir, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); // add some documents Document doc = null; for (int i = 0; i < DocsToAdd; i++) { doc = new Document(); doc.Add(NewStringField("content", English.IntToEnglish(i).Trim(), Field.Store.YES)); writer.AddDocument(doc); } Assert.AreEqual(DocsToAdd, writer.MaxDoc); writer.Dispose(); dir.Dispose(); }
public DocumentIndexer() { if (!System.IO.Directory.Exists(Preferences.Instance.MainIndexFolder)) { logger.Info("Creating main index folder: '{0}'", Preferences.Instance.MainIndexFolder); System.IO.Directory.CreateDirectory(Preferences.Instance.MainIndexFolder); } else { logger.Info("Updating index at '{0}'", Preferences.Instance.MainIndexFolder); } var config = new IndexWriterConfig(FindAPhotoAnalyzers.IndexVersion, FindAPhotoAnalyzers.ForIndexing()); mainWriter = new IndexWriter( FSDirectory.open(new java.io.File(Preferences.Instance.MainIndexFolder)), config); taxonomyWriter = new DirectoryTaxonomyWriter( FSDirectory.open(new java.io.File(Preferences.Instance.FacetIndexFolder)), IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexPreferences = new IndexPreferences(mainWriter); thumbnailIndexer = new ThumbnailIndexer(indexPreferences); startTime = DateTime.Now; }
public void SetUp() { dir = new RAMDirectory(); writer = new IndexWriter(dir, new IndexWriterConfig(Util.LuceneVersion.LUCENE_48, new StandardAnalyzer(Util.LuceneVersion.LUCENE_48))); }
private void button2_Click(object sender, EventArgs e) //zobraz abstrakt pre dbpediu { if (textBox2.Text.Equals("")) { MessageBox.Show("Zadajte cestu k suboru!"); } else { if (textBox3.Text.Equals("")) { MessageBox.Show("Vyplňte políčko: Zadajte hľadaný abstrakt!"); } else { //ak nemam este vyskladany index z mojho suboru if (!MamIndexDBPedia) { directoryDBPedia = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\DBPediaIndex")); analyzerDBPedia = new StandardAnalyzer(Version.LUCENE_29); IndexWriter writer = new IndexWriter(directoryDBPedia, analyzerDBPedia, true, IndexWriter.MaxFieldLength.UNLIMITED); string riadok; cestaSuborDBPedia = textBox2.Text; System.IO.StreamReader subor = new System.IO.StreamReader(cestaSuborDBPedia); while ((riadok = subor.ReadLine()) != null) { string[] poleAbstrakt = riadok.Split(new string[] { ";|?" }, StringSplitOptions.None); //ideme indexovat kazdy jeden riadok ktory sme si predtym podelili na title a abstrakt Document doc = new Document(); doc.Add(new Field("Nadpis", poleAbstrakt[0], Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Abstrakt", poleAbstrakt[1], Field.Store.YES, Field.Index.NO)); writer.AddDocument(doc); } //zavrieme a aktualizujeme zapisovac a ostatne srandy MamIndexDBPedia = true; writer.Optimize(); //writer.Commit(); writer.Close(); MessageBox.Show("Indexovanie súboru prebehlo úspešne!"); } if (MamIndexDBPedia) { //otvorenie directory na citanie indexov a definovanie hladaca IndexReader indexReader = IndexReader.Open(directoryDBPedia, true); Searcher indexSearch = new IndexSearcher(indexReader); //vytvorime samotny vyhladavac, definujeme verziu a pole v ktorom chceme hladat, odovzdame dopyt zadany na vstupe QueryParser queryParser = new QueryParser(Version.LUCENE_29, "Nadpis", analyzerDBPedia); Query query = queryParser.Parse(textBox3.Text); //samotne hladanie a naplnenie struktury pre vysledky TopDocs resultDocs = indexSearch.Search(query, 20); //najdene vysledky hladania var hits = resultDocs.scoreDocs; richTextBox2.Text = ""; abstraktDBpedia = ""; textBox5.Text = resultDocs.totalHits.ToString(); if (resultDocs.totalHits > 0) { foreach (var hit in hits) { var documentFromSearcher = indexSearch.Doc(hit.doc); richTextBox2.Text += documentFromSearcher.Get("Nadpis") + ": " + documentFromSearcher.Get("Abstrakt") + "\n"; abstraktDBpedia += documentFromSearcher.Get("Abstrakt") + " "; } } else { richTextBox2.Text = "Nenasla sa ziadna zhoda"; } } } } }
public void IndexSelectedReviews(ISet <string> reviewIds) { StandardAnalyzer analyzer = null; IndexWriter writer = null; try { analyzer = new StandardAnalyzer(Version.LUCENE_30); writer = new IndexWriter(_dirLocation, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); var tableManager = new TableManager(); var reviewList = tableManager.GetReviewsById(GenerateListFromSet(reviewIds)); foreach (var id in reviewIds) { if (reviewList.ContainsKey(id)) { Trace.TraceInformation("Adding {0} to the index", id); var reviewEntity = reviewList[id]; // delete entry if exists var searchQuery = new TermQuery(new Term(Constants.Constants.Field_Id, id)); writer.DeleteDocuments(searchQuery); // add to index again var doc = new Document(); doc.Add(new Field(Constants.Constants.Field_Id, reviewEntity.ReviewId, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field(Constants.Constants.Field_EntityType, Constants.Constants.Field_EntityType_Reviews, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field(Constants.Constants.Field_ReviewerName, reviewEntity.ReviewerName, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(Constants.Constants.Field_EntityType_ReviewText, reviewEntity.Review, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } else { Trace.TraceWarning("movie {0} not present in db", id); } } } catch (Exception err) { Trace.TraceError("Failed to build index {0}", err); } finally { if (analyzer != null) { analyzer.Close(); } if (writer != null) { writer.Dispose(); } } }
private void CleanWriter() { indexWriter?.Dispose(); indexWriter = null; }
public void Write() { //var indexDir = new System.IO.DirectoryInfo(System.IO.Directory.GetCurrentDirectory() + "/abc"); //var returnIndexDir = FSDirectory.Open(indexDir); ramDir = new RAMDirectory(); //var c = new CharArraySet(LuceneVersion.LUCENE_48, 1, true); //c.Add(' '); //var iwc = new IndexWriterConfig(LuceneVersion.LUCENE_48, // new StandardAnalyzer(LuceneVersion.LUCENE_48, c)); var indexwRiteCfg = new IndexWriterConfig(LuceneVersion.LUCENE_48, new ClassicAnalyzer(LuceneVersion.LUCENE_48)); var writer = new IndexWriter(ramDir, indexwRiteCfg); using (var con = new SqlConnection("server=.;database=testdb;uid=sa;pwd=1;")) { var list = con.Query <dynamic>("SELECT FMEFeeItemID, FName,FPy FROM t_bx_feeitem"); foreach (var item in list) { var doc = new Document(); var fname = item.FName == null ? "" : item.FName.ToString(); doc.Add(new Field("fname", fname, new FieldType() { IsIndexed = true, IsStored = true })); string fpy = item.FPy == null ? "" : item.FPy?.ToString(); var newpy = new StringBuilder(); for (int i = 0; i < fpy.Length; i++) { if (_cxDic.ContainsKey(fpy[i])) { newpy.Append(_cxDic[fpy[i]]); } else { newpy.Append(fpy[i]); } } doc.Add(new Field("fpy", newpy.ToString(), new FieldType() { IsIndexed = true, IsStored = false, })); doc.Add(new Field("py", fpy, new FieldType() { IsIndexed = false, IsStored = true, })); var fmefeeitemid = item.FMEFeeItemID == null ? "" : item.FMEFeeItemID?.ToString(); doc.Add(new Field("fmefeeitemid", fmefeeitemid, new FieldType() { IsIndexed = false, IsStored = true })); writer.AddDocument(doc); } writer.Flush(true, true); writer.Commit(); } }
public virtual void TestNrt() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); // Don't allow tiny maxBufferedDocs; it can make this // test too slow: iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs)); // MockRandom/AlcololicMergePolicy are too slow: TieredMergePolicy tmp = new TieredMergePolicy(); tmp.FloorSegmentMB = .001; iwc.SetMergePolicy(tmp); IndexWriter w = new IndexWriter(dir, iwc); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop); var mgr = new SearcherTaxonomyManager(w, true, null, tw); var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr); reopener.Name = "reopener"; reopener.Start(); indexer.Name = "indexer"; indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); reopener.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Dispose(mgr, tw, w, taxoDir, dir); }
private async Task UpdateIndicesWithDeletedDocuments(DateTimeOffset lastSyncPoint, DateTimeOffset currentSyncPoint, int numberOfDeletes, IndexWriter indexWriter, CancellationToken cancellationToken) { using (var dbConnection = await _SQLservice.GetConnection(cancellationToken)) { var startRow = 1; var dbCountCommand = @"SELECT Id from [dbo].[Test_Data] WHERE DeletedAt >= @lastSyncPoint AND DeletedAt < @currentSyncPoint AND DeletedAt IS NOT NULL ORDER BY Id ASC OFFSET @StartRow - 1 ROWS FETCH NEXT @RowsPerPage ROWS ONLY "; while (numberOfDeletes >= startRow) { cancellationToken.ThrowIfCancellationRequested(); var cmd = new SqlCommand(dbCountCommand, dbConnection); cmd.Parameters.Add("@lastSyncPoint", System.Data.SqlDbType.DateTimeOffset); cmd.Parameters["@lastSyncPoint"].Value = lastSyncPoint; cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset); cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint; cmd.Parameters.Add("@StartRow", System.Data.SqlDbType.Int); cmd.Parameters["@StartRow"].Value = startRow; cmd.Parameters.Add("@RowsPerPage", System.Data.SqlDbType.Int); cmd.Parameters["@RowsPerPage"].Value = PageSize; try { using (var reader = await _SQLservice.ExecuteReaderWithRetryAsync(cmd, System.Data.CommandBehavior.SequentialAccess, cancellationToken)) { while (await reader.ReadAsync()) { var document_id = await reader.GetFieldValueAsync <int>(0); indexWriter.DeleteDocuments(new Term("doc_id", document_id.ToString())); startRow++; } } _logger.LogDebug("Processed {0} records (of {1} total) for delete", (startRow - 1), numberOfDeletes); } catch (Exception ex) { _logger.LogError(ex, "unexpected failure during indexes dalete"); throw; } } _logger.LogInformation("Processed {0} records for delete of FTS indices. Completed.", (startRow - 1)); } }
public Grouping() { #region Init Directory directory = new RAMDirectory(); var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48); var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer); IndexWriter indexWriter = new IndexWriter(directory, config); #endregion #region Add Docs to Index #region Setup Group End Field FieldType groupEndFieldType = new FieldType(); groupEndFieldType.IsStored = false; groupEndFieldType.IsTokenized = false; groupEndFieldType.IsIndexed = true; groupEndFieldType.IndexOptions = IndexOptions.DOCS_ONLY; groupEndFieldType.OmitNorms = true; Field groupEndField = new Field("groupEnd", "x", groupEndFieldType); #endregion List <Document> documentList = new List <Document>(); Document doc = new Document(); doc.Add(new StringField("BookId", "B1", Field.Store.YES)); doc.Add(new StringField("Category", "Cat 1", Field.Store.YES)); doc.Add(new Int32Field("Repetition", 1, Field.Store.YES)); documentList.Add(doc); doc = new Document(); doc.Add(new StringField("BookId", "B2", Field.Store.YES)); doc.Add(new StringField("Category", "Cat 1", Field.Store.YES)); doc.Add(new Int32Field("Repetition", 1, Field.Store.YES)); documentList.Add(doc); doc.Add(groupEndField); indexWriter.AddDocuments(documentList); documentList = new List <Document>(); doc = new Document(); doc.Add(new StringField("BookId", "B3", Field.Store.YES)); doc.Add(new StringField("Category", "Cat 2", Field.Store.YES)); doc.Add(new Int32Field("Repetition", 2, Field.Store.YES)); documentList.Add(doc); doc.Add(groupEndField); indexWriter.AddDocuments(documentList); indexWriter.Dispose(); #endregion //BasicFindRepByNumericRange(directory); //LookupGroupsByIntAlt(directory); TwoPassGroupingSearch(directory); directory.Dispose(); }
public virtual void TestDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: int ordLimit = TEST_NIGHTLY ? 100000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop.Get()) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (VERBOSE) { Console.WriteLine("TEST: now stop"); } IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir); }
private static void IndexDocs2(IndexWriter writer, string path) { if (SystemIO.Directory.Exists(path)) { TBFileIndexing.AppendText(Environment.NewLine + path); SystemIO.DirectoryInfo dirInfo = new SystemIO.DirectoryInfo(path); string folderExclude = System.Configuration.ConfigurationManager.AppSettings["FolderExclude"].ToLower(); if (folderExclude.Contains(dirInfo.Name.ToLower())) { return; } foreach (SystemIO.FileInfo file in dirInfo.GetFiles()) { IndexDocs(writer, file.FullName); } foreach (SystemIO.DirectoryInfo dir in dirInfo.GetDirectories()) { IndexDocs(writer, dir.FullName); } } else { bool ignoreIndex = false; //TBFileIndexing.AppendText(Environment.NewLine + path); if (TBFileIndexing.Text.Length > 5000) { TBFileIndexing.Text = ""; } string fileExt = SystemIO.Path.GetExtension(path); if (fileExt != null) { fileExt = fileExt.ToLower(); } string extInclude = System.Configuration.ConfigurationManager.AppSettings["ExtInclude"]; if (!string.IsNullOrEmpty(extInclude)) { if (!extInclude.Contains(fileExt)) { TBFileIndexing.AppendText("... not include file extension - ignored"); ignoreIndex = true; } } else { string extExclude = System.Configuration.ConfigurationManager.AppSettings["ExtExclude"]; if (extExclude.Contains(fileExt)) { TBFileIndexing.AppendText("... exclude file extension - ignored"); ignoreIndex = true; } } SystemIO.FileInfo fileInfo = new System.IO.FileInfo(path); if (fileInfo.Length > 5000000)//~5MB { TBFileIndexing.AppendText("... big file - ignored"); ignoreIndex = true; } try { Document doc = new Document(); Field pathField = new Field("path", path.Replace(rootPath + "\\", ""), Field.Store.YES, Field.Index.NO); doc.Add(pathField); //doc.Add(new Field("modified", // new SystemIO.FileInfo(path).LastWriteTime.ToString("yyyyMMddHHmmss"), Field.Store.YES, Field.Index.NO)); string textContent = string.Empty; if (!ignoreIndex) //only read not ignore files { textContent = ReaderFactory.GetText(path); } textContent = SystemIO.Path.GetFileNameWithoutExtension(path) + Environment.NewLine + textContent; textContent = PrepareForIndex(textContent, fileExt); Field contentField = new Field("content", textContent, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); doc.Add(contentField); writer.AddDocument(doc); } catch (Exception ex) { TBFileIndexing.AppendText("...read failed: " + ex.Message); } } }
/// <summary> /// Assigns the additional parameters. /// </summary> /// <param name = "settings"></param> public override void AssignSettings(Dictionary <string, string> settings) { _checkIndexes = bool.Parse(settings["CheckIndexes"]); bool autoCommit = bool.Parse(settings["AutoCommit"]); string luceneDotNetIndexDirectory = settings["LuceneDotNetIndexDirectory"]; string currentCrawlDirectory = Path.Combine(luceneDotNetIndexDirectory, "CurrentCrawl"); //create required directories... if (!Directory.Exists(luceneDotNetIndexDirectory)) { Directory.CreateDirectory(luceneDotNetIndexDirectory); } if (!Directory.Exists(currentCrawlDirectory)) { Directory.CreateDirectory(currentCrawlDirectory); } //create lucene.net directories... _luceneDotNetIndexDirectory = FSDirectory.Open(new DirectoryInfo(luceneDotNetIndexDirectory)); _currentCrawlDirectory = FSDirectory.Open(new DirectoryInfo(currentCrawlDirectory)); _standardAnalyzer = new StandardAnalyzer(); //delete the lock - a crawl may have been prematurely terminated, likely by the user's election. write.lock prevents us from writing to the index on subsequent crawls. if (File.Exists(Path.Combine(luceneDotNetIndexDirectory, "write.lock"))) { File.Delete(Path.Combine(luceneDotNetIndexDirectory, "write.lock")); } //delete the lock - a crawl may have been prematurely terminated, likely by the user's election. write.lock prevents us from writing to the index on subsequent crawls. if (File.Exists(Path.Combine(currentCrawlDirectory, "write.lock"))) { File.Delete(Path.Combine(currentCrawlDirectory, "write.lock")); } File.Delete(Path.Combine(currentCrawlDirectory, "write.lock")); ManageIndexes(); TearDownIndexWriter(); _indexFiles = bool.Parse(settings["IndexFiles"]); _indexImages = bool.Parse(settings["IndexImages"]); _indexWebPages = bool.Parse(settings["IndexWebPages"]); //check to see if we have requested to rebuild the index. if (bool.Parse(settings["RebuildIndexOnLoad"])) { string tempDirectory = Path.Combine(luceneDotNetIndexDirectory, "Temp"); int fileIDLowerBound = int.Parse(settings["FileIDLowerBound"]); int fileIDUpperBound = int.Parse(settings["FileIDUpperBound"]); int imageIDLowerBound = int.Parse(settings["ImageIDLowerBound"]); int imageIDUpperBound = int.Parse(settings["ImageIDUpperBound"]); int webPageIDLowerBound = int.Parse(settings["WebPageIDLowerBound"]); int webPageIDUpperBound = int.Parse(settings["WebPageIDUpperBound"]); RebuildIndexes(tempDirectory, fileIDLowerBound, fileIDUpperBound, imageIDLowerBound, imageIDUpperBound, webPageIDLowerBound, webPageIDUpperBound); TearDownIndexWriter(); } //switch back to the _current if (autoCommit) { //NOTE: autoCommit was disabled in Lucene.net 2.4. The threads now check when to Commit(); _autoCommit = true; _autoCommitLock = new object(); _lastCommitDateTime = DateTime.Now; //_indexWriter = new IndexWriter(_luceneDotNetIndexDirectory, true, _standardAnalyzer, false); _indexWriter = new IndexWriter(_luceneDotNetIndexDirectory, _standardAnalyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); } else { _indexWriter = new IndexWriter(_currentCrawlDirectory, _standardAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); } SetIndexWriterDefaults(); _indexSearcher = new IndexSearcher(_luceneDotNetIndexDirectory, true); }
/// <summary> /// Indexes the data from the given <see cref="IDictionary"/>. </summary> /// <param name="dict"> Dictionary to index </param> /// <param name="config"> <see cref="IndexWriterConfig"/> to use </param> /// <param name="fullMerge"> whether or not the spellcheck index should be fully merged </param> /// <exception cref="ObjectDisposedException"> if the <see cref="SpellChecker"/> is already disposed </exception> /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception> public void IndexDictionary(IDictionary dict, IndexWriterConfig config, bool fullMerge) { lock (modifyCurrentIndexLock) { EnsureOpen(); Directory dir = this.spellIndex; using (var writer = new IndexWriter(dir, config)) { IndexSearcher indexSearcher = ObtainSearcher(); IList <TermsEnum> termsEnums = new List <TermsEnum>(); IndexReader reader = searcher.IndexReader; if (reader.MaxDoc > 0) { foreach (AtomicReaderContext ctx in reader.Leaves) { Terms terms = ctx.AtomicReader.GetTerms(F_WORD); if (terms != null) { termsEnums.Add(terms.GetIterator(null)); } } } bool isEmpty = termsEnums.Count == 0; try { IBytesRefIterator iter = dict.GetEntryIterator(); BytesRef currentTerm; while ((currentTerm = iter.Next()) != null) { string word = currentTerm.Utf8ToString(); int len = word.Length; if (len < 3) { continue; // too short we bail but "too long" is fine... } if (!isEmpty) { foreach (TermsEnum te in termsEnums) { if (te.SeekExact(currentTerm)) { goto termsContinue; } } } // ok index the word var doc = CreateDocument(word, GetMin(len), GetMax(len)); writer.AddDocument(doc); termsContinue: ; } } finally { ReleaseSearcher(indexSearcher); } if (fullMerge) { writer.ForceMerge(1); } } // TODO: this isn't that great, maybe in the future SpellChecker should take // IWC in its ctor / keep its writer open? // also re-open the spell index to see our own changes when the next suggestion // is fetched: SwapSearcher(dir); } }
public virtual void TestKnownSetOfDocuments() { System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new MockRAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1); writer.AddDocument(testDoc2); writer.AddDocument(testDoc3); writer.AddDocument(testDoc4); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir); TermEnum termEnum = knownSearcher.reader_ForNUnit.Terms(); TermDocs termDocs = knownSearcher.reader_ForNUnit.TermDocs(); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.GetSimilarity(); while (termEnum.Next() == true) { Term term = termEnum.Term(); //System.out.println("Term: " + term); termDocs.Seek(term); while (termDocs.Next()) { int docId = termDocs.Doc(); int freq = termDocs.Freq(); //System.out.println("Doc Id: " + docId + " freq " + freq); TermFreqVector vector = knownSearcher.reader_ForNUnit.GetTermFreqVector(docId, "field"); float tf = sim.Tf(freq); float idf = sim.Idf(term, knownSearcher); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text().Equals(vTerms[i])) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("field", "chocolate")); ScoreDoc[] hits = knownSearcher.Search(query, null, 1000).scoreDocs; //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length == 3); float score = hits[0].score; /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(hits[0].doc == 2); Assert.IsTrue(hits[1].doc == 3); Assert.IsTrue(hits[2].doc == 0); TermFreqVector vector2 = knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, "field"); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = -1; try { freqInt = (System.Int32)test4Map[term]; } catch (Exception) { Assert.IsTrue(false); } Assert.IsTrue(freqInt == freq); } SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, mapper); System.Collections.Generic.SortedDictionary <object, object> vectorEntrySet = mapper.GetTermVectorEntrySet(); Assert.IsTrue(vectorEntrySet.Count == 10, "mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.Count + " is not: " + 10); TermVectorEntry last = null; foreach (TermVectorEntry tve in vectorEntrySet.Keys) { if (tve != null && last != null) { Assert.IsTrue(last.GetFrequency() >= tve.GetFrequency(), "terms are not properly sorted"); System.Int32 expectedFreq = (System.Int32)test4Map[tve.GetTerm()]; //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields Assert.IsTrue(tve.GetFrequency() == 2 * expectedFreq, "Frequency is not correct:"); } last = tve; } FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, fieldMapper); System.Collections.IDictionary map = fieldMapper.GetFieldToTerms(); Assert.IsTrue(map.Count == 2, "map Size: " + map.Count + " is not: " + 2); vectorEntrySet = (System.Collections.Generic.SortedDictionary <Object, Object>)map["field"]; Assert.IsTrue(vectorEntrySet != null, "vectorEntrySet is null and it shouldn't be"); Assert.IsTrue(vectorEntrySet.Count == 10, "vectorEntrySet Size: " + vectorEntrySet.Count + " is not: " + 10); knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
public void IndexSelectedMovies(ISet <string> movieIds) { StandardAnalyzer analyzer = null; IndexWriter writer = null; try { analyzer = new StandardAnalyzer(Version.LUCENE_30); writer = new IndexWriter(_dirLocation, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); var tableManager = new TableManager(); var movieList = tableManager.GetMoviesByid(GenerateListFromSet(movieIds)); foreach (var id in movieIds) { if (movieList.ContainsKey(id)) { Trace.TraceInformation("Adding {0} to the index", id); var movieEntity = movieList[id]; // delete entry if exists var searchQuery = new TermQuery(new Term(Constants.Constants.Field_Id, id)); writer.DeleteDocuments(searchQuery); // add to index again var doc = new Document(); doc.Add(new Field(Constants.Constants.Field_Id, movieEntity.MovieId, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field(Constants.Constants.Field_Name, movieEntity.Name, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field(Constants.Constants.Field_AltNames, movieEntity.AltNames, Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field(Constants.Constants.Field_Actors, movieEntity.Actors, Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field(Constants.Constants.Field_Directors, movieEntity.Directors, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(Constants.Constants.Field_MusicDirectors, movieEntity.MusicDirectors, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(Constants.Constants.Field_Name, movieEntity.Name, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(Constants.Constants.Field_Producers, movieEntity.Producers, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(Constants.Constants.Field_MovieSynopsis, movieEntity.Synopsis, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } else { Trace.TraceWarning("movie {0} not present in db", id); } } } catch (Exception err) { Trace.TraceError("Failed to build index {0}", err); } finally { if (analyzer != null) { analyzer.Close(); } if (writer != null) { writer.Dispose(); } } }
/// <summary> /// Open the <seealso cref="DirectoryReader"/> from this {@link /// IndexWriter}. /// </summary> protected virtual DirectoryReader OpenIndexReader(IndexWriter writer) { return DirectoryReader.Open(writer, false); }
public void DeleteContent(IndexWriter indexWriter, Guid id) { indexWriter.deleteDocuments(new Term(SearchFieldName.Id, id.ToFieldValue())); }
public async Task <RebuildIndicesResponse> Handle(RebuildIndicesRequest request, CancellationToken cancellationToken) { _logger.LogDebug("RebuildIndicesResponseHandler started."); cancellationToken.ThrowIfCancellationRequested(); IndexWriter writer = null; Lucene.Net.Store.Azure.AzureDirectory azureDirectory = null; DateTimeOffset lastSyncPoint = DateTimeOffset.MinValue; DateTimeOffset currentSyncPoint = DateTimeOffset.Now; int? updatedCount = null; int? deletedCount = null; try { // Ensures index backwards compatibility var AppLuceneVersion = LuceneVersion.LUCENE_48; //Azure configuration var accountSAS = new Microsoft.Azure.Storage.Auth.StorageCredentials(AzureLuceneConfiguration.SASToken); var accountWithSAS = new Microsoft.Azure.Storage.CloudStorageAccount(accountSAS, AzureLuceneConfiguration.AzureStorageAccountName, endpointSuffix: null, useHttps: true); var tempLocation = AzureLuceneConfiguration.TempDirectory ?? "temp"; _logger.LogTrace("tempLocation: {0}", tempLocation); azureDirectory = new Lucene.Net.Store.Azure.AzureDirectory(accountWithSAS, tempLocation, containerName: AzureLuceneConfiguration.Container); //ensure RAMDirectory azureDirectory.CacheDirectory = new RAMDirectory(); //create an analyzer to process the text var analyzer = new StandardAnalyzer(AppLuceneVersion); //create an index writer var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer); writer = new IndexWriter(azureDirectory, indexConfig); //used to be dir _logger.LogTrace("IndexWriter is initialized"); if (request.FullRebuild) { _logger.LogInformation("Full Rebuild is requested. Deleting indices"); writer.DeleteAll(); writer.Commit(); _logger.LogTrace("Full Rebuild is committed."); } using (var dbConnection = await _SQLservice.GetConnection(cancellationToken)) { SqlCommand cmd; if (!request.FullRebuild) { //we need last sync point only if it is not full rebuild var dbCommand = @"SELECT TOP 1 LastSyncPoint FROM [dbo].[FTS_Config]"; cmd = new SqlCommand(dbCommand, dbConnection); try { var untyped = await _SQLservice.ExecuteScalarWithRetryAsync(cmd, cancellationToken); var lastSyncPointNullable = untyped as DateTimeOffset?; if (lastSyncPointNullable.HasValue) { lastSyncPoint = lastSyncPointNullable.Value; } _logger.LogDebug("Last sync point is {0}", lastSyncPointNullable.HasValue ? lastSyncPointNullable.Value.ToString() : "'never'"); } catch (Exception ex) { _logger.LogError(ex, "unexpected failure to acquire LastSyncPoint from database"); throw; } } else { lastSyncPoint = DateTimeOffset.MinValue; } //determine number of records that will need to be processed var dbCountCommand = @"SELECT COUNT(Id) from [dbo].[Test_Data] WHERE UpdatedAt >= @lastSyncPoint AND UpdatedAt < @currentSyncPoint AND DeletedAt IS NULL"; cmd = new SqlCommand(dbCountCommand, dbConnection); cmd.Parameters.Add("@lastSyncPoint", System.Data.SqlDbType.DateTimeOffset); cmd.Parameters["@lastSyncPoint"].Value = lastSyncPoint; cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset); cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint; try { var untyped = await _SQLservice.ExecuteScalarWithRetryAsync(cmd, cancellationToken); updatedCount = untyped as int?; _logger.LogDebug("Expected number of updated documents {0}", updatedCount.HasValue ? updatedCount.Value.ToString() : "'none'"); } catch (Exception ex) { _logger.LogError(ex, "unexpected failure to acquire number of documents to be updated from database"); throw; } //working on deleted documents if (!request.FullRebuild) { //also need to remove "Deleted" documents. Only if not full rebuild of indices var dbDeletedCountCommand = @"SELECT COUNT(Id) from [dbo].[Test_Data] WHERE DeletedAt >= @lastSyncPoint AND DeletedAt<=@currentSyncPoint AND DeletedAt IS NOT NULL"; cmd = new SqlCommand(dbDeletedCountCommand, dbConnection); cmd.Parameters.Add("@lastSyncPoint", System.Data.SqlDbType.DateTimeOffset); cmd.Parameters["@lastSyncPoint"].Value = lastSyncPoint; cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset); cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint; try { var untyped = await _SQLservice.ExecuteScalarWithRetryAsync(cmd, cancellationToken); deletedCount = untyped as int?; _logger.LogDebug("Expected number of deleted documents {0}", deletedCount.HasValue ? updatedCount.Value.ToString() : "'none'"); } catch (Exception ex) { _logger.LogError(ex, "unexpected failure to acquire 'number of documents to be delete from indicies' from database"); throw; } } } var atLeastOneUpdate = false; if (updatedCount.HasValue && updatedCount.Value > 0) { _logger.LogDebug("Expected number of updated documents: {0}", updatedCount.Value); //Start updating 'Updated records' await UpdateIndicesWithAddedDocuments(lastSyncPoint, currentSyncPoint, updatedCount.Value, writer, cancellationToken); atLeastOneUpdate = true; } else { _logger.LogDebug("Expected number of updated documents: none "); } if (deletedCount.HasValue && deletedCount.Value > 0) { _logger.LogDebug("Expected number of deleted documents: {0}", deletedCount.Value); await UpdateIndicesWithDeletedDocuments(lastSyncPoint, currentSyncPoint, deletedCount.Value, writer, cancellationToken); atLeastOneUpdate = true; } else { _logger.LogDebug("Expected number of updated documents: none "); } if (atLeastOneUpdate) { _logger.LogDebug("Expected number of updated documents: none "); _luceneReaderService.Evict(); writer.Flush(triggerMerge: true, applyAllDeletes: true); _logger.LogInformation("Indexes are updated"); } //update LastSyncPoint using (var dbConnection = await _SQLservice.GetConnection(cancellationToken)) { var dbCommand = @"UPDATE [dbo].[FTS_Config] SET LastSyncPoint = @currentSyncPoint"; var cmd = new SqlCommand(dbCommand, dbConnection); cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset); cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint; try { await _SQLservice.ExecuteNonQueryWithRetryAsync(cmd, cancellationToken); _logger.LogDebug("Last sync point is set to {0}", currentSyncPoint); } catch (Exception ex) { _logger.LogError(ex, "unexpected failure to update LastSyncPoint in database"); throw; } } var result = new RebuildIndicesResponse { IsValid = true, Success = true, NumberOfUpdates = updatedCount, NumberOfDeletes = deletedCount, CurrentSyncPoint = currentSyncPoint }; return(result); } catch (LockObtainFailedException) { var result = new RebuildIndicesResponse(); result.IsValid = false; result.Errors = new List <string>(); result.Errors.Add("Failed to lock full text search index file. Probaly there is another job is running. Please try again later."); return(result); } catch (Exception ex) { var result = new RebuildIndicesResponse(); result.IsValid = false; result.Errors = new List <string>(); result.Errors.Add("Unexpected error occured: " + ex.Message); return(result); } finally { if (writer != null) { writer.Dispose(); } if (azureDirectory != null) { azureDirectory.Dispose(); } } }
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { MergePolicy.OneMerge merge = null; while ((merge = writer.NextMerge) != null) { if (VERBOSE) { Console.WriteLine("executing merge " + merge.SegString(writer.Directory)); } writer.Merge(merge); } }
private void ProcessQueue(DataTable q, string indexPath) { rowsProcessed = 0; rowsToProcess = q.Rows.Count; // first process deletes with reader try { IndexReader reader = IndexReader.Open(indexPath); foreach (DataRow row in q.Rows) { Term term = new Term("Key", row["ItemKey"].ToString()); try { reader.DeleteDocuments(term); log.Debug("reader.DeleteDocuments(term) for Key " + row["ItemKey"].ToString()); } catch (Exception ge) { // TODO: monitor what real exceptions if any occur and then // change this catch to catch only the expected ones // instead of non specific exception log.Error(ge); } bool removeOnly = Convert.ToBoolean(row["RemoveOnly"]); if (removeOnly) { Int64 rowId = Convert.ToInt64(row["RowId"]); IndexingQueue.Delete(rowId); } if (DateTime.UtcNow > nextStatusUpdateTime) { // don't mark as complete because there may be more qu items //for different index paths in a multi site installation bool markAsComplete = false; ReportStatus(markAsComplete); } } reader.Close(); } catch (IOException ex) { log.Info("IndexWriter swallowed exception this is not unexpected if building or rebuilding the search index ", ex); errorCount += 1; } catch (TypeInitializationException ex) { log.Info("IndexWriter swallowed exception ", ex); errorCount += 1; } // next add items with writer IndexWriter indexWriter = GetWriter(indexPath); if (indexWriter == null) { log.Error("failed to get IndexWriter for path: " + indexPath); errorCount += 1; return; } foreach (DataRow row in q.Rows) { bool removeOnly = Convert.ToBoolean(row["RemoveOnly"]); if (!removeOnly) { try { IndexItem indexItem = (IndexItem)SerializationHelper.DeserializeFromString(typeof(IndexItem), row["SerializedItem"].ToString()); Document doc = GetDocument(indexItem); WriteToIndex(doc, indexWriter); log.Debug("called WriteToIndex(doc, indexWriter) for key " + indexItem.Key); Int64 rowId = Convert.ToInt64(row["RowId"]); IndexingQueue.Delete(rowId); } catch (Exception ex) { log.Error(ex); } } if (DateTime.UtcNow > nextStatusUpdateTime) { // don't mark as complete because there may be more qu items //for different index paths in a multi site installation bool markAsComplete = false; ReportStatus(markAsComplete); } } try { indexWriter.Optimize(); } catch (IOException ex) { log.Error(ex); } try { indexWriter.Close(); } catch (IOException ex) { log.Error(ex); } }
public MyMergeThread(TestMergeSchedulerExternal.MyMergeScheduler outerInstance, IndexWriter writer, MergePolicy.OneMerge merge) : base(outerInstance, writer, merge) { this.OuterInstance = outerInstance; outerInstance.OuterInstance.MergeThreadCreated = true; }
private async Task UpdateIndicesWithAddedDocuments(DateTimeOffset lastSyncPoint, DateTimeOffset currentSyncPoint, int numberOfUpdates, IndexWriter indexWriter, CancellationToken cancellationToken) { using (var dbConnection = await _SQLservice.GetConnection(cancellationToken)) { var startRow = 1; var dbCountCommand = @"SELECT Id, ISNULL(Name,''), ISNULL(Content,''), UpdatedAt from [dbo].[Test_Data] WHERE UpdatedAt >= @lastSyncPoint AND UpdatedAt < @currentSyncPoint AND DeletedAt IS NULL ORDER BY Id ASC OFFSET @StartRow - 1 ROWS FETCH NEXT @RowsPerPage ROWS ONLY "; while (numberOfUpdates >= startRow) { cancellationToken.ThrowIfCancellationRequested(); var cmd = new SqlCommand(dbCountCommand, dbConnection); cmd.Parameters.Add("@lastSyncPoint", System.Data.SqlDbType.DateTimeOffset); cmd.Parameters["@lastSyncPoint"].Value = lastSyncPoint; cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset); cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint; cmd.Parameters.Add("@StartRow", System.Data.SqlDbType.Int); cmd.Parameters["@StartRow"].Value = startRow; cmd.Parameters.Add("@RowsPerPage", System.Data.SqlDbType.Int); cmd.Parameters["@RowsPerPage"].Value = PageSize; try { using (var reader = await _SQLservice.ExecuteReaderWithRetryAsync(cmd, System.Data.CommandBehavior.SequentialAccess, cancellationToken)) { while (await reader.ReadAsync()) { var document_id = await reader.GetFieldValueAsync <int>(0); var document_name = await reader.GetFieldValueAsync <string>(1); var document_content = await reader.GetFieldValueAsync <string>(2); var document_updatedAt = await reader.GetFieldValueAsync <DateTimeOffset>(3); var updatedAtAsNumber = int.Parse(document_updatedAt.ToString("yyyyMMdd")); var searchDocument = new SearchDocument() { DocumentID = document_id.ToString(), Name = document_name, Content = document_content, UpdatedAt = updatedAtAsNumber }; var doc = new Lucene.Net.Documents.Document { // StringField indexes but doesn't tokenize new Lucene.Net.Documents.StringField("doc_id", searchDocument.DocumentID, Lucene.Net.Documents.Field.Store.YES), new Lucene.Net.Documents.StringField("name", searchDocument.Name, Lucene.Net.Documents.Field.Store.YES), new Lucene.Net.Documents.TextField("content", searchDocument.Content, Lucene.Net.Documents.Field.Store.YES), new Lucene.Net.Documents.Int32Field("updated", searchDocument.UpdatedAt, Lucene.Net.Documents.Field.Store.YES) }; indexWriter.AddDocument(doc); startRow++; } } _logger.LogDebug("Processed {0} records (of {1} total) for update", (startRow - 1), numberOfUpdates); } catch (Exception ex) { _logger.LogError(ex, "unexpected failure during indexes update"); throw; } } _logger.LogInformation("Processed {0} records for update of FTS indices. Completed.", (startRow - 1)); } }
public abstract void AddToLuceneIndex(T sampleData, IndexWriter writer);
/// <summary> /// Open the <see cref="DirectoryReader"/> from this <see cref="IndexWriter"/>. /// </summary> protected virtual DirectoryReader OpenIndexReader(IndexWriter writer) { return(DirectoryReader.Open(writer, false)); }
protected internal override void Publish(IndexWriter writer) { Debug.Assert(!m_published, "ticket was already publised - can not publish twice"); m_published = true; FinishFlush(writer, segment, m_frozenUpdates); }
public virtual void TestCRTReopen() { //test behaving badly //should be high enough int maxStaleSecs = 20; //build crap data just to store it. string s = " abcdefghijklmnopqrstuvwxyz "; char[] chars = s.ToCharArray(); StringBuilder builder = new StringBuilder(2048); for (int i = 0; i < 2048; i++) { builder.Append(chars[Random().Next(chars.Length)]); } string content = builder.ToString(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); Directory dir = new NRTCachingDirectory(NewFSDirectory(CreateTempDir("nrt")), 5, 128); IndexWriterConfig config = new IndexWriterConfig( #pragma warning disable 612, 618 Version.LUCENE_46, #pragma warning restore 612, 618 new MockAnalyzer(Random())); config.SetIndexDeletionPolicy(sdp); config.SetOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(dir, config); SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory()); TrackingIndexWriter tiw = new TrackingIndexWriter(iw); ControlledRealTimeReopenThread <IndexSearcher> controlledRealTimeReopenThread = new ControlledRealTimeReopenThread <IndexSearcher>(tiw, sm, maxStaleSecs, 0); controlledRealTimeReopenThread.SetDaemon(true); controlledRealTimeReopenThread.Start(); IList <ThreadClass> commitThreads = new List <ThreadClass>(); for (int i = 0; i < 500; i++) { if (i > 0 && i % 50 == 0) { ThreadClass commitThread = new RunnableAnonymousInnerClassHelper(this, sdp, dir, iw); commitThread.Start(); commitThreads.Add(commitThread); } Document d = new Document(); d.Add(new TextField("count", i + "", Field.Store.NO)); d.Add(new TextField("content", content, Field.Store.YES)); long start = Environment.TickCount; long l = tiw.AddDocument(d); controlledRealTimeReopenThread.WaitForGeneration(l); long wait = Environment.TickCount - start; assertTrue("waited too long for generation " + wait, wait < (maxStaleSecs * 1000)); IndexSearcher searcher = sm.Acquire(); TopDocs td = searcher.Search(new TermQuery(new Term("count", i + "")), 10); sm.Release(searcher); assertEquals(1, td.TotalHits); } foreach (ThreadClass commitThread in commitThreads) { commitThread.Join(); } controlledRealTimeReopenThread.Dispose(); sm.Dispose(); iw.Dispose(); dir.Dispose(); }
public static UmbracoExamineSearcher GetUmbracoSearcher(IndexWriter writer) { return(new UmbracoExamineSearcher(writer, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29))); }
public virtual void Test_Directory() // LUCENENET specific - name collides with property of LuceneTestCase { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); var tw = new DirectoryTaxonomyWriter(taxoDir); // first empty commit w.Commit(); tw.Commit(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("field", true); AtomicBoolean stop = new AtomicBoolean(); // How many unique facets to index before stopping: //int ordLimit = TestNightly ? 100000 : 6000; // LUCENENET specific: 100000 facets takes about 2-3 hours. To keep it under // the 1 hour free limit of Azure DevOps, this was reduced to 30000. int ordLimit = TestNightly ? 30000 : 6000; var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop); indexer.Start(); try { while (!stop) { SearcherAndTaxonomy pair = mgr.Acquire(); try { //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize()); FacetsCollector sfc = new FacetsCollector(); pair.Searcher.Search(new MatchAllDocsQuery(), sfc); Facets facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc); FacetResult result = facets.GetTopChildren(10, "field"); if (pair.Searcher.IndexReader.NumDocs > 0) { //System.out.println(pair.taxonomyReader.getSize()); Assert.True(result.ChildCount > 0); Assert.True(result.LabelValues.Length > 0); } //if (VERBOSE) { //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0))); //} } finally { mgr.Release(pair); } } } finally { indexer.Join(); } if (Verbose) { Console.WriteLine("TEST: now stop"); } IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir); }
public virtual void TestConcurrency() { AtomicInt32 numDocs = new AtomicInt32(AtLeast(10000)); Directory indexDir = NewDirectory(); Directory taxoDir = NewDirectory(); ConcurrentDictionary <string, string> values = new ConcurrentDictionary <string, string>(); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); var tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs)); ThreadJob[] indexThreads = new ThreadJob[AtLeast(4)]; FacetsConfig config = new FacetsConfig(); for (int i = 0; i < 10; i++) { config.SetHierarchical("l1." + i, true); config.SetMultiValued("l1." + i, true); } for (int i = 0; i < indexThreads.Length; i++) { indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config); } foreach (ThreadJob t in indexThreads) { t.Start(); } foreach (ThreadJob t in indexThreads) { t.Join(); } var tr = new DirectoryTaxonomyReader(tw); // +1 for root category if (values.Count + 1 != tr.Count) { foreach (string value in values.Keys) { FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value)); if (tr.GetOrdinal(label) == -1) { Console.WriteLine("FAIL: path=" + label + " not recognized"); } } fail("mismatch number of categories"); } int[] parents = tr.ParallelTaxonomyArrays.Parents; foreach (string cat in values.Keys) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat)); Assert.True(tr.GetOrdinal(cp) > 0, "category not found " + cp); int level = cp.Length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) FacetLabel path = null; for (int i = 0; i < level; i++) { path = cp.Subpath(i + 1); int ord = tr.GetOrdinal(path); Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path); parentOrd = ord; // next level should have this parent } } IOUtils.Dispose(tw, iw, tr, taxoDir, indexDir); }
/// <summary> /// Initializes a new instance of the <see cref="LuceneIndex" /> class. /// </summary> /// <param name="indexPath">The path to the directory that will contain the Lucene index files.</param> /// <param name="schema">The schema.</param> /// <exception cref="System.ArgumentNullException"></exception> public LuceneIndex(string indexPath, Schema schema) { if (String.IsNullOrWhiteSpace(indexPath)) throw new ArgumentNullException(nameof(indexPath)); if (schema == null) throw new ArgumentNullException(nameof(schema)); IndexPath = indexPath; Schema = schema; if (System.IO.Directory.Exists(IndexPath)) { if (Schema.IsDefault()) throw new InvalidOperationException($"There is an existing index on '{IndexPath}'."); } else { System.IO.Directory.CreateDirectory(IndexPath); } _indexDirectory = new MMapDirectory(Paths.get(IndexPath)); var taxonomyIndexPath = System.IO.Path.Combine(IndexPath, "taxonomy"); if (!System.IO.Directory.Exists(taxonomyIndexPath)) System.IO.Directory.CreateDirectory(taxonomyIndexPath); _taxonomyDirectory = new MMapDirectory(Paths.get(taxonomyIndexPath)); _compositeAnalyzer = new CompositeAnalyzer(Schema); _ramBufferSizeMB = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.RAMBufferSizeMB"] ?? "128"); var config = new IndexWriterConfig(_compositeAnalyzer) .SetOpenMode(IndexWriterConfigOpenMode.CREATE_OR_APPEND) .SetRAMBufferSizeMB(_ramBufferSizeMB) .SetCommitOnClose(true); _indexWriter = new IndexWriter(_indexDirectory, config); _taxonomyWriter = new DirectoryTaxonomyWriter(_taxonomyDirectory, IndexWriterConfigOpenMode.CREATE_OR_APPEND); _searcherTaxonomyManager = new SearcherTaxonomyManager(_indexWriter, true, null, _taxonomyWriter); _facetBuilder = new LuceneFacetBuilder(_taxonomyWriter); _refreshIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexSearcher.RefreshIntervalSeconds"] ?? "0.5"); _commitIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.CommitIntervalSeconds"] ?? "60"); _writeAllowedFlag = new ManualResetEventSlim(true); _refreshTimer = new Timer(o => Refresh(), null, TimeSpan.FromSeconds(_refreshIntervalSeconds), TimeSpan.FromSeconds(_refreshIntervalSeconds)); _commitTimer = new Timer(o => Commit(), null, TimeSpan.FromSeconds(_commitIntervalSeconds), TimeSpan.FromSeconds(_commitIntervalSeconds)); }
public ThreadAnonymousInnerClassHelper(TestConcurrentFacetedIndexing outerInstance, AtomicInt32 numDocs, ConcurrentDictionary <string, string> values, IndexWriter iw, Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter tw, FacetsConfig config) { this.outerInstance = outerInstance; this.numDocs = numDocs; this.values = values; this.iw = iw; this.tw = tw; this.config = config; }
public void TestSubclassConcurrentMergeScheduler() { MockDirectoryWrapper dir = NewMockDirectory(); dir.FailOn(new FailOnlyOnMerge()); Document doc = new Document(); Field idField = NewStringField("id", "", Field.Store.YES); doc.Add(idField); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new MyMergeScheduler(this)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy())); LogMergePolicy logMP = (LogMergePolicy)writer.Config.MergePolicy; logMP.MergeFactor = 10; for (int i = 0; i < 20; i++) { writer.AddDocument(doc); } ((MyMergeScheduler)writer.Config.MergeScheduler).Sync(); writer.Dispose(); Assert.IsTrue(MergeThreadCreated); Assert.IsTrue(MergeCalled); Assert.IsTrue(ExcCalled); dir.Dispose(); }
/// <summary> /// /// </summary> public void IndexCreate() { analyzer = new PanGuAnalyzer(); IndexWriter writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexDirectory)), analyzer, true, IndexWriter.MaxFieldLength.LIMITED); }
public void TestCustomMergeScheduler() { // we don't really need to execute anything, just to make sure the custom MS // compiles. But ensure that it can be used as well, e.g., no other hidden // dependencies or something. Therefore, don't use any random API ! Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null); conf.SetMergeScheduler(new ReportingMergeScheduler()); IndexWriter writer = new IndexWriter(dir, conf); writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.ForceMerge(1); writer.Dispose(); dir.Dispose(); }
protected internal abstract void Publish(IndexWriter writer);
public RunnableAnonymousInnerClassHelper(TestControlledRealTimeReopenThread outerInstance, SnapshotDeletionPolicy sdp, Directory dir, IndexWriter iw) { this.outerInstance = outerInstance; this.sdp = sdp; this.dir = dir; this.iw = iw; }
/** * Split a given index into 3 indexes for training, test and cross validation tasks respectively * * @param originalIndex an {@link AtomicReader} on the source index * @param trainingIndex a {@link Directory} used to write the training index * @param testIndex a {@link Directory} used to write the test index * @param crossValidationIndex a {@link Directory} used to write the cross validation index * @param analyzer {@link Analyzer} used to create the new docs * @param fieldNames names of fields that need to be put in the new indexes or <code>null</code> if all should be used * @throws IOException if any writing operation fails on any of the indexes */ public void Split(AtomicReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, params string[] fieldNames) { // create IWs for train / test / cv IDXs IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer)); IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer)); IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer)); try { int size = originalIndex.MaxDoc; IndexSearcher indexSearcher = new IndexSearcher(originalIndex); TopDocs topDocs = indexSearcher.Search(new MatchAllDocsQuery(), Int32.MaxValue); // set the type to be indexed, stored, with term vectors FieldType ft = new FieldType(TextField.TYPE_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; int b = 0; // iterate over existing documents foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs) { // create a new document for indexing Document doc = new Document(); if (fieldNames != null && fieldNames.Length > 0) { foreach (String fieldName in fieldNames) { doc.Add(new Field(fieldName, originalIndex.Document(scoreDoc.Doc).GetField(fieldName).ToString(), ft)); } } else { foreach (IndexableField storableField in originalIndex.Document(scoreDoc.Doc).Fields) { if (storableField.ReaderValue != null) { doc.Add(new Field(storableField.Name, storableField.ReaderValue, ft)); } else if (storableField.BinaryValue != null) { doc.Add(new Field(storableField.Name, storableField.BinaryValue, ft)); } else if (storableField.StringValue != null) { doc.Add(new Field(storableField.Name, storableField.StringValue, ft)); } else if (storableField.NumericValue != null) { doc.Add(new Field(storableField.Name, storableField.NumericValue.ToString(), ft)); } } } // add it to one of the IDXs if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio) { testWriter.AddDocument(doc); } else if (cvWriter.MaxDoc < size * _crossValidationRatio) { cvWriter.AddDocument(doc); } else { trainingWriter.AddDocument(doc); } b++; } } catch (Exception e) { throw new IOException("Exceptio in DatasetSplitter", e); } finally { testWriter.Commit(); cvWriter.Commit(); trainingWriter.Commit(); // close IWs testWriter.Dispose(); cvWriter.Dispose(); trainingWriter.Dispose(); } }