public virtual void TestAddSameDocTwice()
        {
            // LUCENE-5367: this was a problem with the previous code, making sure it
            // works with the new code.
            Directory indexDir = NewDirectory(), taxoDir = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig facetsConfig = new FacetsConfig();
            Document doc = new Document();
            doc.Add(new FacetField("a", "b"));
            doc = facetsConfig.Build(taxoWriter, doc);
            // these two addDocument() used to fail
            indexWriter.AddDocument(doc);
            indexWriter.AddDocument(doc);
            IOUtils.Close(indexWriter, taxoWriter);

            DirectoryReader indexReader = DirectoryReader.Open(indexDir);
            DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
            IndexSearcher searcher = NewSearcher(indexReader);
            FacetsCollector fc = new FacetsCollector();
            searcher.Search(new MatchAllDocsQuery(), fc);

            Facets facets = GetTaxonomyFacetCounts(taxoReader, facetsConfig, fc);
            FacetResult res = facets.GetTopChildren(10, "a");
            Assert.AreEqual(1, res.LabelValues.Length);
            Assert.AreEqual(2, res.LabelValues[0].Value);
            IOUtils.Close(indexReader, taxoReader);

            IOUtils.Close(indexDir, taxoDir);
        }
Example #2
0
        public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir)
        {
            int[] freq = new int[nTerms];
            Terms = new Term[nTerms];
            for (int i = 0; i < nTerms; i++)
            {
                int f = (nTerms + 1) - i; // make first terms less frequent
                freq[i] = (int)Math.Ceiling(Math.Pow(f, power));
                Terms[i] = new Term("f", char.ToString((char)('A' + i)));
            }

            IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE));
            for (int i = 0; i < nDocs; i++)
            {
                Document d = new Document();
                for (int j = 0; j < nTerms; j++)
                {
                    if (Random().Next(freq[j]) == 0)
                    {
                        d.Add(NewStringField("f", Terms[j].Text(), Field.Store.NO));
                        //System.out.println(d);
                    }
                }
                iw.AddDocument(d);
            }
            iw.ForceMerge(1);
            iw.Dispose();
        }
        /// <summary>
        /// Annotates the given sequence of <see cref="Document"/> objects by adding a <b>_highlight</b> field;
        /// the <b>_highlight</b> field will contain the best matching text fragment from the <see cref="Document"/> 
        /// object's full-text field.
        /// </summary>
        /// <param name="hits">The sequence of <see cref="Document"/> objects.</param>
        /// <param name="criteria">The search criteria that produced the hits.</param>
        /// <returns>
        /// The original sequence of Document objects, with a <b>_highlight</b> field added to each Document.
        /// </returns>
        public static IEnumerable<Document> GenerateHighlights(this IEnumerable<Document> hits, SearchCriteria criteria)
        {
            if (hits == null)
                throw new ArgumentNullException(nameof(hits));
            if (criteria == null)
                throw new ArgumentNullException(nameof(criteria));
            if (String.IsNullOrWhiteSpace(criteria.Query))
                throw new ArgumentException("SearchCriteria.Query cannot be empty");

            var documents = hits.ToList();
            try
            {
                var indexDirectory = new RAMDirectory();
                var analyzer = new FullTextAnalyzer();
                var config = new IndexWriterConfig(analyzer).SetRAMBufferSizeMB(_ramBufferSizeMB);
                var writer = new IndexWriter(indexDirectory, config);

                BuidIndex(documents, writer);
                GenerateHighlights(documents, writer, criteria);

                writer.DeleteAll();
                writer.Commit();
                writer.Close();
                indexDirectory.Close();
            }
            catch (Exception ex)
            {
                _log.Error(ex);
            }

            return documents;
        }
Example #4
0
        public virtual void TestCustomLockFactory()
        {
            Directory dir = new MockDirectoryWrapper(Random(), new RAMDirectory());
            MockLockFactory lf = new MockLockFactory(this);
            dir.LockFactory = lf;

            // Lock prefix should have been set:
            Assert.IsTrue(lf.LockPrefixSet, "lock prefix was not set by the RAMDirectory");

            IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            // add 100 documents (so that commit lock is used)
            for (int i = 0; i < 100; i++)
            {
                AddDoc(writer);
            }

            // Both write lock and commit lock should have been created:
            Assert.AreEqual(1, lf.LocksCreated.Count, "# of unique locks created (after instantiating IndexWriter)");
            Assert.IsTrue(lf.MakeLockCount >= 1, "# calls to makeLock is 0 (after instantiating IndexWriter)");

            foreach (String lockName in lf.LocksCreated.Keys)
            {
                MockLockFactory.MockLock @lock = (MockLockFactory.MockLock)lf.LocksCreated[lockName];
                Assert.IsTrue(@lock.LockAttempts > 0, "# calls to Lock.obtain is 0 (after instantiating IndexWriter)");
            }

            writer.Dispose();
        }
Example #5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public static void main(String[] args) throws java.io.IOException
	  public static void Main(string[] args)
	  {
		if (args.Length < 3)
		{
		  Console.Error.WriteLine("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
		  Environment.Exit(1);
		}
		FSDirectory mergedIndex = FSDirectory.open(new File(args[0]));

		IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(Version.LUCENE_CURRENT, null)
		   .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

		Directory[] indexes = new Directory[args.Length - 1];
		for (int i = 1; i < args.Length; i++)
		{
		  indexes[i - 1] = FSDirectory.open(new File(args[i]));
		}

		Console.WriteLine("Merging...");
		writer.addIndexes(indexes);

		Console.WriteLine("Full merge...");
		writer.forceMerge(1);
		writer.close();
		Console.WriteLine("Done.");
	  }
Example #6
0
        public virtual void TestMmapIndex()
        {
            // sometimes the directory is not cleaned by rmDir, because on Windows it
            // may take some time until the files are finally dereferenced. So clean the
            // directory up front, or otherwise new IndexWriter will fail.
            DirectoryInfo dirPath = CreateTempDir("testLuceneMmap");
            RmDir(dirPath);
            MMapDirectory dir = new MMapDirectory(dirPath, null);

            // plan to add a set of useful stopwords, consider changing some of the
            // interior filters.
            MockAnalyzer analyzer = new MockAnalyzer(Random());
            // TODO: something about lock timeouts and leftover locks.
            IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));
            writer.Commit();
            IndexReader reader = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            int num = AtLeast(1000);
            for (int dx = 0; dx < num; dx++)
            {
                string f = RandomField();
                Document doc = new Document();
                doc.Add(NewTextField("data", f, Field.Store.YES));
                writer.AddDocument(doc);
            }

            reader.Dispose();
            writer.Dispose();
            RmDir(dirPath);
        }
		public LogDocMergePolicy(IndexWriter writer):base(writer)
		{
			minMergeSize = DEFAULT_MIN_MERGE_DOCS;
			
			// maxMergeSize is never used by LogDocMergePolicy; set
			// it to Long.MAX_VALUE to disable it
			maxMergeSize = System.Int64.MaxValue;
		}
 protected override MergeThread GetMergeThread(IndexWriter writer, MergePolicy.OneMerge merge)
 {
     MergeThread thread = new MyMergeThread(this, writer, merge);
     thread.ThreadPriority = MergeThreadPriority;
     thread.SetDaemon(true);
     thread.Name = "MyMergeThread";
     return thread;
 }
		public LogByteSizeMergePolicy(IndexWriter writer)
            : base(writer)
		{
			minMergeSize = (long) (DEFAULT_MIN_MERGE_MB * 1024 * 1024);
            //mgarski - the line below causes an overflow in .NET, resulting in a negative number...
			//maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB * 1024 * 1024);
            maxMergeSize = DEFAULT_MAX_MERGE_MB;
		}
 private void AddDocs2(IndexWriter writer, int numDocs)
 {
     for (int i = 0; i < numDocs; i++)
     {
         Document doc = new Document();
         doc.Add(NewTextField("content", "bbb", Field.Store.NO));
         writer.AddDocument(doc);
     }
 }
 public IndexerThread(IndexWriter w, FacetsConfig config, TaxonomyWriter tw, ReferenceManager<SearcherAndTaxonomy> mgr, int ordLimit, AtomicBoolean stop)
 {
     this.w = w;
     this.config = config;
     this.tw = tw;
     this.mgr = mgr;
     this.ordLimit = ordLimit;
     this.stop = stop;
 }
        private IndexWriter NewWriter(Directory dir, IndexWriterConfig conf)
        {
            LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy();
            logByteSizeMergePolicy.NoCFSRatio = 0.0; // make sure we use plain
            // files
            conf.SetMergePolicy(logByteSizeMergePolicy);

            IndexWriter writer = new IndexWriter(dir, conf);
            return writer;
        }
 private void AddDocs3(IndexWriter writer, int numDocs)
 {
     for (int i = 0; i < numDocs; i++)
     {
         Document doc = new Document();
         doc.Add(NewTextField("content", "ccc", Field.Store.NO));
         doc.Add(NewStringField("id", "" + i, Field.Store.YES));
         writer.AddDocument(doc);
     }
 }
Example #14
0
 // TODO: this should be setUp()....
 public virtual void CreateDummySearcher()
 {
     // Create a dummy index with nothing in it.
     // this could possibly fail if Lucene starts checking for docid ranges...
     d = NewDirectory();
     IndexWriter iw = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
     iw.AddDocument(new Document());
     iw.Dispose();
     r = DirectoryReader.Open(d);
     s = NewSearcher(r);
 }
 public override void TearDown()
 {
     Iw.Dispose();
     TestUtil.CheckIndex(Dir); // for some extra coverage, checkIndex before we forceMerge
     Iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND);
     IndexWriter iw = new IndexWriter(Dir, (IndexWriterConfig)Iwc.Clone());
     iw.ForceMerge(1);
     iw.Dispose();
     Dir.Dispose(); // just force a checkindex for now
     base.TearDown();
 }
        public void BeforeClassSorterUtilTest()
        {
            // only read the values of the undeleted documents, since after addIndexes,
            // the deleted ones will be dropped from the index.
            Bits liveDocs = reader.LiveDocs;
            List<int> values = new List<int>();
            for (int i = 0; i < reader.MaxDoc; i++)
            {
                if (liveDocs == null || liveDocs.Get(i))
                {
                    values.Add(int.Parse(reader.Document(i).Get(ID_FIELD), CultureInfo.InvariantCulture));
                }
            }
            int idx = Random().nextInt(SORT.Length);
            Sort sorter = SORT[idx];
            if (idx == 1)
            { // reverse doc sort
                values.Reverse();
            }
            else
            {
                values.Sort();
                if (Random().nextBoolean())
                {
                    sorter = new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type_e.LONG, true)); // descending
                    values.Reverse();
                }
            }
            sortedValues = values.ToArray();
            if (VERBOSE)
            {
                Console.WriteLine("sortedValues: " + sortedValues);
                Console.WriteLine("Sorter: " + sorter);
            }

            Directory target = NewDirectory();
            using (IndexWriter writer = new IndexWriter(target, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)))
            {
                using (reader = SortingAtomicReader.Wrap(reader, sorter))
                {
                    writer.AddIndexes(reader);
                }
            }
            dir.Dispose();

            // CheckIndex the target directory
            dir = target;
            TestUtil.CheckIndex(dir);

            // set reader for tests
            reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            assertFalse("index should not have deletions", reader.HasDeletions);
        }
		/// <summary>Just do the merges in sequence. We do this
		/// "synchronized" so that even if the application is using
		/// multiple threads, only one merge may run at a time. 
		/// </summary>
		public override void  Merge(IndexWriter writer)
		{
			lock (this)
			{
				while (true)
				{
					MergePolicy.OneMerge merge = writer.GetNextMerge();
					if (merge == null)
						break;
					writer.Merge(merge);
				}
			}
		}
 /// <summary>
 /// Just do the merges in sequence. We do this
 /// "synchronized" so that even if the application is using
 /// multiple threads, only one merge may run at a time.
 /// </summary>
 public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound)
 {
     lock (this)
     {
         while (true)
         {
             MergePolicy.OneMerge merge = writer.NextMerge;
             if (merge == null)
             {
                 break;
             }
             writer.Merge(merge);
         }
     }
 }
 public virtual void TestFailIfIndexWriterNotClosed()
 {
     MockDirectoryWrapper dir = NewMockDirectory();
     IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
     try
     {
         dir.Dispose();
         Assert.Fail();
     }
     catch (Exception expected)
     {
         Assert.IsTrue(expected.Message.Contains("there are still open locks"));
     }
     iw.Dispose();
     dir.Dispose();
 }
        public virtual void TestBasic()
        {
            HashSet<string> fileExtensions = new HashSet<string>();
            fileExtensions.Add(Lucene40StoredFieldsWriter.FIELDS_EXTENSION);
            fileExtensions.Add(Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);

            MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(Random(), new RAMDirectory());
            primaryDir.CheckIndexOnClose = false; // only part of an index
            MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(Random(), new RAMDirectory());
            secondaryDir.CheckIndexOnClose = false; // only part of an index

            FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true);
            // for now we wire Lucene40Codec because we rely upon its specific impl
            bool oldValue = OLD_FORMAT_IMPERSONATION_IS_ACTIVE;
            OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
            IndexWriter writer = new IndexWriter(fsd, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NewLogMergePolicy(false)).SetCodec(Codec.ForName("Lucene40")).SetUseCompoundFile(false));
            TestIndexWriterReader.CreateIndexNoClose(true, "ram", writer);
            IndexReader reader = DirectoryReader.Open(writer, true);
            Assert.AreEqual(100, reader.MaxDoc);
            writer.Commit();
            // we should see only fdx,fdt files here
            string[] files = primaryDir.ListAll();
            Assert.IsTrue(files.Length > 0);
            for (int x = 0; x < files.Length; x++)
            {
                string ext = FileSwitchDirectory.GetExtension(files[x]);
                Assert.IsTrue(fileExtensions.Contains(ext));
            }
            files = secondaryDir.ListAll();
            Assert.IsTrue(files.Length > 0);
            // we should not see fdx,fdt files here
            for (int x = 0; x < files.Length; x++)
            {
                string ext = FileSwitchDirectory.GetExtension(files[x]);
                Assert.IsFalse(fileExtensions.Contains(ext));
            }
            reader.Dispose();
            writer.Dispose();

            files = fsd.ListAll();
            for (int i = 0; i < files.Length; i++)
            {
                Assert.IsNotNull(files[i]);
            }
            fsd.Dispose();
            OLD_FORMAT_IMPERSONATION_IS_ACTIVE = oldValue;
        }
 public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     IndexWriter writer = new IndexWriter(Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
     //writer.setNoCFSRatio(0.0);
     //writer.infoStream = System.out;
     FieldType customType = new FieldType(TextField.TYPE_STORED);
     customType.Tokenized = false;
     customType.StoreTermVectors = true;
     for (int i = 0; i < NumDocs; i++)
     {
         Documents.Document doc = new Documents.Document();
         Field fld = NewField("field", English.IntToEnglish(i), customType);
         doc.Add(fld);
         writer.AddDocument(doc);
     }
     writer.Dispose();
 }
        public ThumbnailIndexer(IndexPreferences indexPreferences)
        {
            this.indexPreferences = indexPreferences;

            if (!System.IO.Directory.Exists(Preferences.Instance.ThumbIndexFolder))
            {
                logger.Info("Creating thumbs folder: '{0}'", Preferences.Instance.ThumbIndexFolder);
                System.IO.Directory.CreateDirectory(Preferences.Instance.ThumbIndexFolder);
            }

            var config = new IndexWriterConfig(FindAPhotoAnalyzers.IndexVersion, FindAPhotoAnalyzers.ForIndexing());
            writer = new IndexWriter(
                FSDirectory.open(new java.io.File(Preferences.Instance.ThumbIndexFolder)),
                config);
            writer.commit();

            searchManager = new SearchManager(writer.getDirectory());

            tasks.Add(Task.Run(DequeueFiles));
            tasks.Add(Task.Run(DequeueFiles));
            tasks.Add(Task.Run(CommitTask));
        }
        public override void SetUp()
        {
            base.SetUp();
            //IndexDir = CreateTempDir("RAMDirIndex");
            string tempDir = Path.GetTempPath();
            if (tempDir == null)
                throw new IOException("java.io.tmpdir undefined, cannot run test");
            IndexDir = new DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex"));

            Directory dir = NewFSDirectory(IndexDir);
            IndexWriter writer = new IndexWriter(dir, (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));
            // add some documents
            Document doc = null;
            for (int i = 0; i < DocsToAdd; i++)
            {
                doc = new Document();
                doc.Add(NewStringField("content", English.IntToEnglish(i).Trim(), Field.Store.YES));
                writer.AddDocument(doc);
            }
            Assert.AreEqual(DocsToAdd, writer.MaxDoc);
            writer.Dispose();
            dir.Dispose();
        }
        public DocumentIndexer()
        {
            if (!System.IO.Directory.Exists(Preferences.Instance.MainIndexFolder))
            {
                logger.Info("Creating main index folder: '{0}'", Preferences.Instance.MainIndexFolder);
                System.IO.Directory.CreateDirectory(Preferences.Instance.MainIndexFolder);
            }
            else
            {
                logger.Info("Updating index at '{0}'", Preferences.Instance.MainIndexFolder);
            }

            var config = new IndexWriterConfig(FindAPhotoAnalyzers.IndexVersion, FindAPhotoAnalyzers.ForIndexing());
            mainWriter = new IndexWriter(
                FSDirectory.open(new java.io.File(Preferences.Instance.MainIndexFolder)),
                config);
            taxonomyWriter = new DirectoryTaxonomyWriter(
                FSDirectory.open(new java.io.File(Preferences.Instance.FacetIndexFolder)),
                IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

            indexPreferences = new IndexPreferences(mainWriter);
            thumbnailIndexer = new ThumbnailIndexer(indexPreferences);
            startTime = DateTime.Now;
        }
Example #25
0
 public void SetUp()
 {
     dir    = new RAMDirectory();
     writer = new IndexWriter(dir, new IndexWriterConfig(Util.LuceneVersion.LUCENE_48, new StandardAnalyzer(Util.LuceneVersion.LUCENE_48)));
 }
Example #26
0
        private void button2_Click(object sender, EventArgs e) //zobraz abstrakt pre dbpediu
        {
            if (textBox2.Text.Equals(""))
            {
                MessageBox.Show("Zadajte cestu k suboru!");
            }
            else
            {
                if (textBox3.Text.Equals(""))
                {
                    MessageBox.Show("Vyplňte políčko: Zadajte hľadaný abstrakt!");
                }
                else
                {
                    //ak nemam este vyskladany index z mojho suboru
                    if (!MamIndexDBPedia)
                    {
                        directoryDBPedia = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\DBPediaIndex"));
                        analyzerDBPedia  = new StandardAnalyzer(Version.LUCENE_29);
                        IndexWriter writer = new IndexWriter(directoryDBPedia, analyzerDBPedia, true, IndexWriter.MaxFieldLength.UNLIMITED);

                        string riadok;
                        cestaSuborDBPedia = textBox2.Text;

                        System.IO.StreamReader subor = new System.IO.StreamReader(cestaSuborDBPedia);

                        while ((riadok = subor.ReadLine()) != null)
                        {
                            string[] poleAbstrakt = riadok.Split(new string[] { ";|?" }, StringSplitOptions.None);

                            //ideme indexovat kazdy jeden riadok ktory sme si predtym podelili na title a abstrakt
                            Document doc = new Document();

                            doc.Add(new Field("Nadpis", poleAbstrakt[0], Field.Store.YES, Field.Index.ANALYZED));
                            doc.Add(new Field("Abstrakt", poleAbstrakt[1], Field.Store.YES, Field.Index.NO));

                            writer.AddDocument(doc);
                        }
                        //zavrieme a aktualizujeme zapisovac a ostatne srandy
                        MamIndexDBPedia = true;
                        writer.Optimize();
                        //writer.Commit();
                        writer.Close();
                        MessageBox.Show("Indexovanie súboru prebehlo úspešne!");
                    }

                    if (MamIndexDBPedia)
                    {
                        //otvorenie directory na citanie indexov a definovanie hladaca
                        IndexReader indexReader = IndexReader.Open(directoryDBPedia, true);
                        Searcher    indexSearch = new IndexSearcher(indexReader);

                        //vytvorime samotny vyhladavac, definujeme verziu a pole v ktorom chceme hladat, odovzdame dopyt zadany na vstupe
                        QueryParser queryParser = new QueryParser(Version.LUCENE_29, "Nadpis", analyzerDBPedia);
                        Query       query       = queryParser.Parse(textBox3.Text);

                        //samotne hladanie a naplnenie struktury pre vysledky
                        TopDocs resultDocs = indexSearch.Search(query, 20);

                        //najdene vysledky hladania
                        var hits = resultDocs.scoreDocs;

                        richTextBox2.Text = "";
                        abstraktDBpedia   = "";

                        textBox5.Text = resultDocs.totalHits.ToString();

                        if (resultDocs.totalHits > 0)
                        {
                            foreach (var hit in hits)
                            {
                                var documentFromSearcher = indexSearch.Doc(hit.doc);

                                richTextBox2.Text += documentFromSearcher.Get("Nadpis") + ": " + documentFromSearcher.Get("Abstrakt") + "\n";

                                abstraktDBpedia += documentFromSearcher.Get("Abstrakt") + " ";
                            }
                        }
                        else
                        {
                            richTextBox2.Text = "Nenasla sa ziadna zhoda";
                        }
                    }
                }
            }
        }
Example #27
0
        public void IndexSelectedReviews(ISet <string> reviewIds)
        {
            StandardAnalyzer analyzer = null;
            IndexWriter      writer   = null;

            try
            {
                analyzer = new StandardAnalyzer(Version.LUCENE_30);
                writer   = new IndexWriter(_dirLocation, analyzer,
                                           IndexWriter.MaxFieldLength.UNLIMITED);

                var tableManager = new TableManager();

                var reviewList = tableManager.GetReviewsById(GenerateListFromSet(reviewIds));

                foreach (var id in reviewIds)
                {
                    if (reviewList.ContainsKey(id))
                    {
                        Trace.TraceInformation("Adding {0} to the index", id);

                        var reviewEntity = reviewList[id];

                        // delete entry if exists
                        var searchQuery = new TermQuery(new Term(Constants.Constants.Field_Id, id));
                        writer.DeleteDocuments(searchQuery);

                        // add to index again
                        var doc = new Document();
                        doc.Add(new Field(Constants.Constants.Field_Id, reviewEntity.ReviewId, Field.Store.YES,
                                          Field.Index.NOT_ANALYZED));

                        doc.Add(new Field(Constants.Constants.Field_EntityType, Constants.Constants.Field_EntityType_Reviews, Field.Store.YES,
                                          Field.Index.NOT_ANALYZED));
                        doc.Add(new Field(Constants.Constants.Field_ReviewerName, reviewEntity.ReviewerName, Field.Store.YES,
                                          Field.Index.ANALYZED));
                        doc.Add(new Field(Constants.Constants.Field_EntityType_ReviewText, reviewEntity.Review, Field.Store.YES, Field.Index.ANALYZED));

                        writer.AddDocument(doc);
                    }
                    else
                    {
                        Trace.TraceWarning("movie {0} not present in db", id);
                    }
                }
            }
            catch (Exception err)
            {
                Trace.TraceError("Failed to build index {0}", err);
            }
            finally
            {
                if (analyzer != null)
                {
                    analyzer.Close();
                }
                if (writer != null)
                {
                    writer.Dispose();
                }
            }
        }
Example #28
0
 private void CleanWriter()
 {
     indexWriter?.Dispose();
     indexWriter = null;
 }
Example #29
0
        public void Write()
        {
            //var indexDir = new System.IO.DirectoryInfo(System.IO.Directory.GetCurrentDirectory() + "/abc");
            //var returnIndexDir = FSDirectory.Open(indexDir);
            ramDir = new RAMDirectory();

            //var c = new CharArraySet(LuceneVersion.LUCENE_48, 1, true);
            //c.Add(' ');
            //var iwc = new IndexWriterConfig(LuceneVersion.LUCENE_48,
            //        new StandardAnalyzer(LuceneVersion.LUCENE_48, c));
            var indexwRiteCfg = new IndexWriterConfig(LuceneVersion.LUCENE_48,
                                                      new ClassicAnalyzer(LuceneVersion.LUCENE_48));
            var writer = new IndexWriter(ramDir, indexwRiteCfg);

            using (var con = new SqlConnection("server=.;database=testdb;uid=sa;pwd=1;"))
            {
                var list = con.Query <dynamic>("SELECT FMEFeeItemID, FName,FPy FROM t_bx_feeitem");
                foreach (var item in list)
                {
                    var doc   = new Document();
                    var fname = item.FName == null ? "" : item.FName.ToString();
                    doc.Add(new Field("fname", fname, new FieldType()
                    {
                        IsIndexed = true, IsStored = true
                    }));

                    string fpy   = item.FPy == null ? "" : item.FPy?.ToString();
                    var    newpy = new StringBuilder();
                    for (int i = 0; i < fpy.Length; i++)
                    {
                        if (_cxDic.ContainsKey(fpy[i]))
                        {
                            newpy.Append(_cxDic[fpy[i]]);
                        }
                        else
                        {
                            newpy.Append(fpy[i]);
                        }
                    }
                    doc.Add(new Field("fpy", newpy.ToString(), new FieldType()
                    {
                        IsIndexed = true, IsStored = false,
                    }));

                    doc.Add(new Field("py", fpy, new FieldType()
                    {
                        IsIndexed = false, IsStored = true,
                    }));

                    var fmefeeitemid = item.FMEFeeItemID == null ? "" : item.FMEFeeItemID?.ToString();
                    doc.Add(new Field("fmefeeitemid", fmefeeitemid, new FieldType()
                    {
                        IsIndexed = false, IsStored = true
                    }));

                    writer.AddDocument(doc);
                }
                writer.Flush(true, true);
                writer.Commit();
            }
        }
Example #30
0
        public virtual void TestNrt()
        {
            Store.Directory   dir     = NewDirectory();
            Store.Directory   taxoDir = NewDirectory();
            IndexWriterConfig iwc     = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            // Don't allow tiny maxBufferedDocs; it can make this
            // test too slow:
            iwc.SetMaxBufferedDocs(Math.Max(500, iwc.MaxBufferedDocs));

            // MockRandom/AlcololicMergePolicy are too slow:
            TieredMergePolicy tmp = new TieredMergePolicy();

            tmp.FloorSegmentMB = .001;
            iwc.SetMergePolicy(tmp);
            IndexWriter  w      = new IndexWriter(dir, iwc);
            var          tw     = new DirectoryTaxonomyWriter(taxoDir);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

            var mgr = new SearcherTaxonomyManager(w, true, null, tw);

            var reopener = new ThreadAnonymousInnerClassHelper(this, stop, mgr);

            reopener.Name = "reopener";
            reopener.Start();

            indexer.Name = "indexer";
            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
                reopener.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Dispose(mgr, tw, w, taxoDir, dir);
        }
Example #31
0
        private async Task UpdateIndicesWithDeletedDocuments(DateTimeOffset lastSyncPoint, DateTimeOffset currentSyncPoint, int numberOfDeletes, IndexWriter indexWriter, CancellationToken cancellationToken)
        {
            using (var dbConnection = await _SQLservice.GetConnection(cancellationToken))
            {
                var startRow = 1;

                var dbCountCommand = @"SELECT Id from [dbo].[Test_Data] 
                        WHERE DeletedAt >= @lastSyncPoint AND DeletedAt < @currentSyncPoint AND DeletedAt IS NOT NULL
                        ORDER BY Id ASC OFFSET @StartRow - 1 ROWS FETCH NEXT @RowsPerPage ROWS ONLY ";


                while (numberOfDeletes >= startRow)
                {
                    cancellationToken.ThrowIfCancellationRequested();

                    var cmd = new SqlCommand(dbCountCommand, dbConnection);
                    cmd.Parameters.Add("@lastSyncPoint", System.Data.SqlDbType.DateTimeOffset);
                    cmd.Parameters["@lastSyncPoint"].Value = lastSyncPoint;
                    cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset);
                    cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint;
                    cmd.Parameters.Add("@StartRow", System.Data.SqlDbType.Int);
                    cmd.Parameters["@StartRow"].Value = startRow;
                    cmd.Parameters.Add("@RowsPerPage", System.Data.SqlDbType.Int);
                    cmd.Parameters["@RowsPerPage"].Value = PageSize;

                    try
                    {
                        using (var reader = await _SQLservice.ExecuteReaderWithRetryAsync(cmd, System.Data.CommandBehavior.SequentialAccess, cancellationToken))
                        {
                            while (await reader.ReadAsync())
                            {
                                var document_id = await reader.GetFieldValueAsync <int>(0);

                                indexWriter.DeleteDocuments(new Term("doc_id", document_id.ToString()));
                                startRow++;
                            }
                        }


                        _logger.LogDebug("Processed {0} records (of {1} total) for delete", (startRow - 1), numberOfDeletes);
                    }
                    catch (Exception ex)
                    {
                        _logger.LogError(ex, "unexpected failure during indexes dalete");
                        throw;
                    }
                }

                _logger.LogInformation("Processed {0} records for delete of FTS indices. Completed.", (startRow - 1));
            }
        }
Example #32
0
        public Grouping()
        {
            #region Init

            Directory directory = new RAMDirectory();

            var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);

            var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);

            IndexWriter indexWriter = new IndexWriter(directory, config);

            #endregion

            #region Add Docs to Index

            #region Setup Group End Field

            FieldType groupEndFieldType = new FieldType();

            groupEndFieldType.IsStored = false;

            groupEndFieldType.IsTokenized = false;

            groupEndFieldType.IsIndexed = true;

            groupEndFieldType.IndexOptions = IndexOptions.DOCS_ONLY;

            groupEndFieldType.OmitNorms = true;

            Field groupEndField = new Field("groupEnd", "x", groupEndFieldType);

            #endregion

            List <Document> documentList = new List <Document>();

            Document doc = new Document();

            doc.Add(new StringField("BookId", "B1", Field.Store.YES));

            doc.Add(new StringField("Category", "Cat 1", Field.Store.YES));

            doc.Add(new Int32Field("Repetition", 1, Field.Store.YES));

            documentList.Add(doc);

            doc = new Document();

            doc.Add(new StringField("BookId", "B2", Field.Store.YES));

            doc.Add(new StringField("Category", "Cat 1", Field.Store.YES));

            doc.Add(new Int32Field("Repetition", 1, Field.Store.YES));

            documentList.Add(doc);

            doc.Add(groupEndField);

            indexWriter.AddDocuments(documentList);

            documentList = new List <Document>();

            doc = new Document();

            doc.Add(new StringField("BookId", "B3", Field.Store.YES));

            doc.Add(new StringField("Category", "Cat 2", Field.Store.YES));

            doc.Add(new Int32Field("Repetition", 2, Field.Store.YES));

            documentList.Add(doc);

            doc.Add(groupEndField);

            indexWriter.AddDocuments(documentList);

            indexWriter.Dispose();

            #endregion

            //BasicFindRepByNumericRange(directory);

            //LookupGroupsByIntAlt(directory);

            TwoPassGroupingSearch(directory);

            directory.Dispose();
        }
Example #33
0
        public virtual void TestDirectory()
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();
            IndexWriter     w        = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);

            // first empty commit
            w.Commit();
            tw.Commit();
            var          mgr    = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);

            indexer.Start();

            try
            {
                while (!stop.Get())
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir);
        }
        private static void IndexDocs2(IndexWriter writer, string path)
        {
            if (SystemIO.Directory.Exists(path))
            {
                TBFileIndexing.AppendText(Environment.NewLine + path);
                SystemIO.DirectoryInfo dirInfo = new SystemIO.DirectoryInfo(path);

                string folderExclude = System.Configuration.ConfigurationManager.AppSettings["FolderExclude"].ToLower();
                if (folderExclude.Contains(dirInfo.Name.ToLower()))
                {
                    return;
                }

                foreach (SystemIO.FileInfo file in dirInfo.GetFiles())
                {
                    IndexDocs(writer, file.FullName);
                }
                foreach (SystemIO.DirectoryInfo dir in dirInfo.GetDirectories())
                {
                    IndexDocs(writer, dir.FullName);
                }
            }
            else
            {
                bool ignoreIndex = false;
                //TBFileIndexing.AppendText(Environment.NewLine + path);
                if (TBFileIndexing.Text.Length > 5000)
                {
                    TBFileIndexing.Text = "";
                }
                string fileExt = SystemIO.Path.GetExtension(path);
                if (fileExt != null)
                {
                    fileExt = fileExt.ToLower();
                }

                string extInclude = System.Configuration.ConfigurationManager.AppSettings["ExtInclude"];
                if (!string.IsNullOrEmpty(extInclude))
                {
                    if (!extInclude.Contains(fileExt))
                    {
                        TBFileIndexing.AppendText("... not include file extension - ignored");
                        ignoreIndex = true;
                    }
                }
                else
                {
                    string extExclude = System.Configuration.ConfigurationManager.AppSettings["ExtExclude"];

                    if (extExclude.Contains(fileExt))
                    {
                        TBFileIndexing.AppendText("... exclude file extension - ignored");
                        ignoreIndex = true;
                    }
                }
                SystemIO.FileInfo fileInfo = new System.IO.FileInfo(path);
                if (fileInfo.Length > 5000000)//~5MB
                {
                    TBFileIndexing.AppendText("... big file - ignored");
                    ignoreIndex = true;
                }

                try
                {
                    Document doc       = new Document();
                    Field    pathField = new Field("path", path.Replace(rootPath + "\\", ""), Field.Store.YES, Field.Index.NO);
                    doc.Add(pathField);
                    //doc.Add(new Field("modified",
                    //    new SystemIO.FileInfo(path).LastWriteTime.ToString("yyyyMMddHHmmss"), Field.Store.YES, Field.Index.NO));
                    string textContent = string.Empty;
                    if (!ignoreIndex) //only read not ignore files
                    {
                        textContent = ReaderFactory.GetText(path);
                    }

                    textContent = SystemIO.Path.GetFileNameWithoutExtension(path) + Environment.NewLine + textContent;
                    textContent = PrepareForIndex(textContent, fileExt);
                    Field contentField = new Field("content", textContent, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
                    doc.Add(contentField);
                    writer.AddDocument(doc);
                }
                catch (Exception ex)
                {
                    TBFileIndexing.AppendText("...read failed: " + ex.Message);
                }
            }
        }
Example #35
0
        /// <summary>
        ///     Assigns the additional parameters.
        /// </summary>
        /// <param name = "settings"></param>
        public override void AssignSettings(Dictionary <string, string> settings)
        {
            _checkIndexes = bool.Parse(settings["CheckIndexes"]);

            bool autoCommit = bool.Parse(settings["AutoCommit"]);

            string luceneDotNetIndexDirectory = settings["LuceneDotNetIndexDirectory"];
            string currentCrawlDirectory      = Path.Combine(luceneDotNetIndexDirectory, "CurrentCrawl");

            //create required directories...
            if (!Directory.Exists(luceneDotNetIndexDirectory))
            {
                Directory.CreateDirectory(luceneDotNetIndexDirectory);
            }

            if (!Directory.Exists(currentCrawlDirectory))
            {
                Directory.CreateDirectory(currentCrawlDirectory);
            }

            //create lucene.net directories...
            _luceneDotNetIndexDirectory = FSDirectory.Open(new DirectoryInfo(luceneDotNetIndexDirectory));
            _currentCrawlDirectory      = FSDirectory.Open(new DirectoryInfo(currentCrawlDirectory));

            _standardAnalyzer = new StandardAnalyzer();

            //delete the lock - a crawl may have been prematurely terminated, likely by the user's election.  write.lock prevents us from writing to the index on subsequent crawls.
            if (File.Exists(Path.Combine(luceneDotNetIndexDirectory, "write.lock")))
            {
                File.Delete(Path.Combine(luceneDotNetIndexDirectory, "write.lock"));
            }

            //delete the lock - a crawl may have been prematurely terminated, likely by the user's election.  write.lock prevents us from writing to the index on subsequent crawls.
            if (File.Exists(Path.Combine(currentCrawlDirectory, "write.lock")))
            {
                File.Delete(Path.Combine(currentCrawlDirectory, "write.lock"));
            }

            File.Delete(Path.Combine(currentCrawlDirectory, "write.lock"));

            ManageIndexes();

            TearDownIndexWriter();

            _indexFiles    = bool.Parse(settings["IndexFiles"]);
            _indexImages   = bool.Parse(settings["IndexImages"]);
            _indexWebPages = bool.Parse(settings["IndexWebPages"]);

            //check to see if we have requested to rebuild the index.
            if (bool.Parse(settings["RebuildIndexOnLoad"]))
            {
                string tempDirectory       = Path.Combine(luceneDotNetIndexDirectory, "Temp");
                int    fileIDLowerBound    = int.Parse(settings["FileIDLowerBound"]);
                int    fileIDUpperBound    = int.Parse(settings["FileIDUpperBound"]);
                int    imageIDLowerBound   = int.Parse(settings["ImageIDLowerBound"]);
                int    imageIDUpperBound   = int.Parse(settings["ImageIDUpperBound"]);
                int    webPageIDLowerBound = int.Parse(settings["WebPageIDLowerBound"]);
                int    webPageIDUpperBound = int.Parse(settings["WebPageIDUpperBound"]);

                RebuildIndexes(tempDirectory, fileIDLowerBound, fileIDUpperBound, imageIDLowerBound, imageIDUpperBound, webPageIDLowerBound, webPageIDUpperBound);

                TearDownIndexWriter();
            }

            //switch back to the _current
            if (autoCommit)
            {
                //NOTE: autoCommit was disabled in Lucene.net 2.4.  The threads now check when to Commit();
                _autoCommit         = true;
                _autoCommitLock     = new object();
                _lastCommitDateTime = DateTime.Now;
                //_indexWriter = new IndexWriter(_luceneDotNetIndexDirectory, true, _standardAnalyzer, false);
                _indexWriter = new IndexWriter(_luceneDotNetIndexDirectory, _standardAnalyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);
            }
            else
            {
                _indexWriter = new IndexWriter(_currentCrawlDirectory, _standardAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            }

            SetIndexWriterDefaults();

            _indexSearcher = new IndexSearcher(_luceneDotNetIndexDirectory, true);
        }
Example #36
0
        /// <summary>
        /// Indexes the data from the given <see cref="IDictionary"/>. </summary>
        /// <param name="dict"> Dictionary to index </param>
        /// <param name="config"> <see cref="IndexWriterConfig"/> to use </param>
        /// <param name="fullMerge"> whether or not the spellcheck index should be fully merged </param>
        /// <exception cref="ObjectDisposedException"> if the <see cref="SpellChecker"/> is already disposed </exception>
        /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception>
        public void IndexDictionary(IDictionary dict, IndexWriterConfig config, bool fullMerge)
        {
            lock (modifyCurrentIndexLock)
            {
                EnsureOpen();
                Directory dir = this.spellIndex;
                using (var writer = new IndexWriter(dir, config))
                {
                    IndexSearcher     indexSearcher = ObtainSearcher();
                    IList <TermsEnum> termsEnums    = new List <TermsEnum>();

                    IndexReader reader = searcher.IndexReader;
                    if (reader.MaxDoc > 0)
                    {
                        foreach (AtomicReaderContext ctx in reader.Leaves)
                        {
                            Terms terms = ctx.AtomicReader.GetTerms(F_WORD);
                            if (terms != null)
                            {
                                termsEnums.Add(terms.GetIterator(null));
                            }
                        }
                    }

                    bool isEmpty = termsEnums.Count == 0;

                    try
                    {
                        IBytesRefIterator iter = dict.GetEntryIterator();
                        BytesRef          currentTerm;

                        while ((currentTerm = iter.Next()) != null)
                        {
                            string word = currentTerm.Utf8ToString();
                            int    len  = word.Length;
                            if (len < 3)
                            {
                                continue; // too short we bail but "too long" is fine...
                            }

                            if (!isEmpty)
                            {
                                foreach (TermsEnum te in termsEnums)
                                {
                                    if (te.SeekExact(currentTerm))
                                    {
                                        goto termsContinue;
                                    }
                                }
                            }

                            // ok index the word
                            var doc = CreateDocument(word, GetMin(len), GetMax(len));
                            writer.AddDocument(doc);
termsContinue:
                            ;
                        }
                    }
                    finally
                    {
                        ReleaseSearcher(indexSearcher);
                    }
                    if (fullMerge)
                    {
                        writer.ForceMerge(1);
                    }
                }
                // TODO: this isn't that great, maybe in the future SpellChecker should take
                // IWC in its ctor / keep its writer open?

                // also re-open the spell index to see our own changes when the next suggestion
                // is fetched:
                SwapSearcher(dir);
            }
        }
Example #37
0
        public virtual void  TestKnownSetOfDocuments()
        {
            System.String test1 = "eating chocolate in a computer lab";                                             //6 terms
            System.String test2 = "computer in a computer lab";                                                     //5 terms
            System.String test3 = "a chocolate lab grows old";                                                      //5 terms
            System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
            System.Collections.IDictionary test4Map = new System.Collections.Hashtable();
            test4Map["chocolate"] = 3;
            test4Map["lab"]       = 2;
            test4Map["eating"]    = 1;
            test4Map["computer"]  = 1;
            test4Map["with"]      = 1;
            test4Map["a"]         = 1;
            test4Map["colored"]   = 1;
            test4Map["in"]        = 1;
            test4Map["an"]        = 1;
            test4Map["computer"]  = 1;
            test4Map["old"]       = 1;

            Document testDoc1 = new Document();

            SetupDoc(testDoc1, test1);
            Document testDoc2 = new Document();

            SetupDoc(testDoc2, test2);
            Document testDoc3 = new Document();

            SetupDoc(testDoc3, test3);
            Document testDoc4 = new Document();

            SetupDoc(testDoc4, test4);

            Directory dir = new MockRAMDirectory();

            try
            {
                IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
                Assert.IsTrue(writer != null);
                writer.AddDocument(testDoc1);
                writer.AddDocument(testDoc2);
                writer.AddDocument(testDoc3);
                writer.AddDocument(testDoc4);
                writer.Close();
                IndexSearcher knownSearcher = new IndexSearcher(dir);
                TermEnum      termEnum      = knownSearcher.reader_ForNUnit.Terms();
                TermDocs      termDocs      = knownSearcher.reader_ForNUnit.TermDocs();
                //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);

                Similarity sim = knownSearcher.GetSimilarity();
                while (termEnum.Next() == true)
                {
                    Term term = termEnum.Term();
                    //System.out.println("Term: " + term);
                    termDocs.Seek(term);
                    while (termDocs.Next())
                    {
                        int docId = termDocs.Doc();
                        int freq  = termDocs.Freq();
                        //System.out.println("Doc Id: " + docId + " freq " + freq);
                        TermFreqVector vector = knownSearcher.reader_ForNUnit.GetTermFreqVector(docId, "field");
                        float          tf     = sim.Tf(freq);
                        float          idf    = sim.Idf(term, knownSearcher);
                        //float qNorm = sim.queryNorm()
                        //This is fine since we don't have stop words
                        float lNorm = sim.LengthNorm("field", vector.GetTerms().Length);
                        //float coord = sim.coord()
                        //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
                        Assert.IsTrue(vector != null);
                        System.String[] vTerms = vector.GetTerms();
                        int[]           freqs  = vector.GetTermFrequencies();
                        for (int i = 0; i < vTerms.Length; i++)
                        {
                            if (term.Text().Equals(vTerms[i]))
                            {
                                Assert.IsTrue(freqs[i] == freq);
                            }
                        }
                    }
                    //System.out.println("--------");
                }
                Query      query = new TermQuery(new Term("field", "chocolate"));
                ScoreDoc[] hits  = knownSearcher.Search(query, null, 1000).scoreDocs;
                //doc 3 should be the first hit b/c it is the shortest match
                Assert.IsTrue(hits.Length == 3);
                float score = hits[0].score;

                /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
                 * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
                 * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
                Assert.IsTrue(hits[0].doc == 2);
                Assert.IsTrue(hits[1].doc == 3);
                Assert.IsTrue(hits[2].doc == 0);
                TermFreqVector vector2 = knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, "field");
                Assert.IsTrue(vector2 != null);
                //System.out.println("Vector: " + vector);
                System.String[] terms  = vector2.GetTerms();
                int[]           freqs2 = vector2.GetTermFrequencies();
                Assert.IsTrue(terms != null && terms.Length == 10);
                for (int i = 0; i < terms.Length; i++)
                {
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    int freq = freqs2[i];
                    Assert.IsTrue(test4.IndexOf(term) != -1);
                    System.Int32 freqInt = -1;
                    try
                    {
                        freqInt = (System.Int32)test4Map[term];
                    }
                    catch (Exception)
                    {
                        Assert.IsTrue(false);
                    }
                    Assert.IsTrue(freqInt == freq);
                }
                SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
                knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, mapper);
                System.Collections.Generic.SortedDictionary <object, object> vectorEntrySet = mapper.GetTermVectorEntrySet();
                Assert.IsTrue(vectorEntrySet.Count == 10, "mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.Count + " is not: " + 10);
                TermVectorEntry last = null;
                foreach (TermVectorEntry tve in vectorEntrySet.Keys)
                {
                    if (tve != null && last != null)
                    {
                        Assert.IsTrue(last.GetFrequency() >= tve.GetFrequency(), "terms are not properly sorted");
                        System.Int32 expectedFreq = (System.Int32)test4Map[tve.GetTerm()];
                        //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields
                        Assert.IsTrue(tve.GetFrequency() == 2 * expectedFreq, "Frequency is not correct:");
                    }
                    last = tve;
                }

                FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
                knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].doc, fieldMapper);
                System.Collections.IDictionary map = fieldMapper.GetFieldToTerms();
                Assert.IsTrue(map.Count == 2, "map Size: " + map.Count + " is not: " + 2);
                vectorEntrySet = (System.Collections.Generic.SortedDictionary <Object, Object>)map["field"];
                Assert.IsTrue(vectorEntrySet != null, "vectorEntrySet is null and it shouldn't be");
                Assert.IsTrue(vectorEntrySet.Count == 10, "vectorEntrySet Size: " + vectorEntrySet.Count + " is not: " + 10);
                knownSearcher.Close();
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
Example #38
0
        public void IndexSelectedMovies(ISet <string> movieIds)
        {
            StandardAnalyzer analyzer = null;
            IndexWriter      writer   = null;

            try
            {
                analyzer = new StandardAnalyzer(Version.LUCENE_30);
                writer   = new IndexWriter(_dirLocation, analyzer,
                                           IndexWriter.MaxFieldLength.UNLIMITED);

                var tableManager = new TableManager();

                var movieList = tableManager.GetMoviesByid(GenerateListFromSet(movieIds));

                foreach (var id in movieIds)
                {
                    if (movieList.ContainsKey(id))
                    {
                        Trace.TraceInformation("Adding {0} to the index", id);

                        var movieEntity = movieList[id];

                        // delete entry if exists
                        var searchQuery = new TermQuery(new Term(Constants.Constants.Field_Id, id));
                        writer.DeleteDocuments(searchQuery);

                        // add to index again
                        var doc = new Document();
                        doc.Add(new Field(Constants.Constants.Field_Id, movieEntity.MovieId, Field.Store.YES,
                                          Field.Index.NOT_ANALYZED));
                        doc.Add(new Field(Constants.Constants.Field_Name, movieEntity.Name, Field.Store.YES,
                                          Field.Index.NOT_ANALYZED));
                        doc.Add(new Field(Constants.Constants.Field_AltNames, movieEntity.AltNames, Field.Store.NO, Field.Index.ANALYZED));

                        doc.Add(new Field(Constants.Constants.Field_Actors, movieEntity.Actors, Field.Store.NO,
                                          Field.Index.ANALYZED));
                        doc.Add(new Field(Constants.Constants.Field_Directors, movieEntity.Directors, Field.Store.YES,
                                          Field.Index.ANALYZED));
                        doc.Add(new Field(Constants.Constants.Field_MusicDirectors, movieEntity.MusicDirectors,
                                          Field.Store.YES, Field.Index.ANALYZED));
                        doc.Add(new Field(Constants.Constants.Field_Name, movieEntity.Name, Field.Store.YES,
                                          Field.Index.ANALYZED));
                        doc.Add(new Field(Constants.Constants.Field_Producers, movieEntity.Producers, Field.Store.YES,
                                          Field.Index.ANALYZED));
                        doc.Add(new Field(Constants.Constants.Field_MovieSynopsis, movieEntity.Synopsis, Field.Store.YES,
                                          Field.Index.ANALYZED));

                        writer.AddDocument(doc);
                    }
                    else
                    {
                        Trace.TraceWarning("movie {0} not present in db", id);
                    }
                }
            }
            catch (Exception err)
            {
                Trace.TraceError("Failed to build index {0}", err);
            }
            finally
            {
                if (analyzer != null)
                {
                    analyzer.Close();
                }
                if (writer != null)
                {
                    writer.Dispose();
                }
            }
        }
 /// <summary>
 /// Open the <seealso cref="DirectoryReader"/> from this {@link
 ///  IndexWriter}. 
 /// </summary>
 protected virtual DirectoryReader OpenIndexReader(IndexWriter writer)
 {
     return DirectoryReader.Open(writer, false);
 }
Example #40
0
 public void DeleteContent(IndexWriter indexWriter, Guid id)
 {
     indexWriter.deleteDocuments(new Term(SearchFieldName.Id, id.ToFieldValue()));
 }
Example #41
0
        public async Task <RebuildIndicesResponse> Handle(RebuildIndicesRequest request, CancellationToken cancellationToken)
        {
            _logger.LogDebug("RebuildIndicesResponseHandler started.");
            cancellationToken.ThrowIfCancellationRequested();

            IndexWriter writer = null;

            Lucene.Net.Store.Azure.AzureDirectory azureDirectory = null;
            DateTimeOffset lastSyncPoint    = DateTimeOffset.MinValue;
            DateTimeOffset currentSyncPoint = DateTimeOffset.Now;
            int?           updatedCount     = null;
            int?           deletedCount     = null;

            try
            {
                // Ensures index backwards compatibility
                var AppLuceneVersion = LuceneVersion.LUCENE_48;

                //Azure configuration
                var accountSAS     = new Microsoft.Azure.Storage.Auth.StorageCredentials(AzureLuceneConfiguration.SASToken);
                var accountWithSAS = new Microsoft.Azure.Storage.CloudStorageAccount(accountSAS, AzureLuceneConfiguration.AzureStorageAccountName, endpointSuffix: null, useHttps: true);

                var tempLocation = AzureLuceneConfiguration.TempDirectory ?? "temp";
                _logger.LogTrace("tempLocation: {0}", tempLocation);

                azureDirectory = new Lucene.Net.Store.Azure.AzureDirectory(accountWithSAS, tempLocation, containerName: AzureLuceneConfiguration.Container);
                //ensure RAMDirectory
                azureDirectory.CacheDirectory = new RAMDirectory();

                //create an analyzer to process the text
                var analyzer = new StandardAnalyzer(AppLuceneVersion);

                //create an index writer
                var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer);

                writer = new IndexWriter(azureDirectory, indexConfig); //used to be dir
                _logger.LogTrace("IndexWriter is initialized");

                if (request.FullRebuild)
                {
                    _logger.LogInformation("Full Rebuild is requested. Deleting indices");
                    writer.DeleteAll();
                    writer.Commit();
                    _logger.LogTrace("Full Rebuild is committed.");
                }



                using (var dbConnection = await _SQLservice.GetConnection(cancellationToken))
                {
                    SqlCommand cmd;
                    if (!request.FullRebuild)
                    {
                        //we need last sync point only if it is not full rebuild
                        var dbCommand = @"SELECT TOP 1 LastSyncPoint FROM [dbo].[FTS_Config]";

                        cmd = new SqlCommand(dbCommand, dbConnection);
                        try
                        {
                            var untyped = await _SQLservice.ExecuteScalarWithRetryAsync(cmd, cancellationToken);

                            var lastSyncPointNullable = untyped as DateTimeOffset?;

                            if (lastSyncPointNullable.HasValue)
                            {
                                lastSyncPoint = lastSyncPointNullable.Value;
                            }

                            _logger.LogDebug("Last sync point is {0}", lastSyncPointNullable.HasValue ? lastSyncPointNullable.Value.ToString() : "'never'");
                        }
                        catch (Exception ex)
                        {
                            _logger.LogError(ex, "unexpected failure to acquire LastSyncPoint from database");
                            throw;
                        }
                    }
                    else
                    {
                        lastSyncPoint = DateTimeOffset.MinValue;
                    }



                    //determine number of records that will need to be processed

                    var dbCountCommand = @"SELECT COUNT(Id) from [dbo].[Test_Data] WHERE UpdatedAt >= @lastSyncPoint AND UpdatedAt < @currentSyncPoint AND DeletedAt IS NULL";
                    cmd = new SqlCommand(dbCountCommand, dbConnection);
                    cmd.Parameters.Add("@lastSyncPoint", System.Data.SqlDbType.DateTimeOffset);
                    cmd.Parameters["@lastSyncPoint"].Value = lastSyncPoint;
                    cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset);
                    cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint;

                    try
                    {
                        var untyped = await _SQLservice.ExecuteScalarWithRetryAsync(cmd, cancellationToken);

                        updatedCount = untyped as int?;
                        _logger.LogDebug("Expected number of updated documents {0}", updatedCount.HasValue ? updatedCount.Value.ToString() : "'none'");
                    }
                    catch (Exception ex)
                    {
                        _logger.LogError(ex, "unexpected failure to acquire number of documents to be updated from database");
                        throw;
                    }



                    //working on deleted documents

                    if (!request.FullRebuild)
                    {
                        //also need to remove "Deleted" documents. Only if not full rebuild of indices
                        var dbDeletedCountCommand = @"SELECT COUNT(Id) from [dbo].[Test_Data] WHERE DeletedAt >= @lastSyncPoint AND DeletedAt<=@currentSyncPoint AND DeletedAt IS NOT NULL";
                        cmd = new SqlCommand(dbDeletedCountCommand, dbConnection);
                        cmd.Parameters.Add("@lastSyncPoint", System.Data.SqlDbType.DateTimeOffset);
                        cmd.Parameters["@lastSyncPoint"].Value = lastSyncPoint;
                        cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset);
                        cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint;
                        try
                        {
                            var untyped = await _SQLservice.ExecuteScalarWithRetryAsync(cmd, cancellationToken);

                            deletedCount = untyped as int?;
                            _logger.LogDebug("Expected number of deleted documents {0}", deletedCount.HasValue ? updatedCount.Value.ToString() : "'none'");
                        }
                        catch (Exception ex)
                        {
                            _logger.LogError(ex, "unexpected failure to acquire 'number of documents to be delete from indicies' from database");
                            throw;
                        }
                    }
                }
                var atLeastOneUpdate = false;
                if (updatedCount.HasValue && updatedCount.Value > 0)
                {
                    _logger.LogDebug("Expected number of updated documents: {0}", updatedCount.Value);
                    //Start updating 'Updated records'
                    await UpdateIndicesWithAddedDocuments(lastSyncPoint, currentSyncPoint, updatedCount.Value, writer, cancellationToken);

                    atLeastOneUpdate = true;
                }
                else
                {
                    _logger.LogDebug("Expected number of updated documents: none ");
                }


                if (deletedCount.HasValue && deletedCount.Value > 0)
                {
                    _logger.LogDebug("Expected number of deleted documents: {0}", deletedCount.Value);
                    await UpdateIndicesWithDeletedDocuments(lastSyncPoint, currentSyncPoint, deletedCount.Value, writer, cancellationToken);

                    atLeastOneUpdate = true;
                }
                else
                {
                    _logger.LogDebug("Expected number of updated documents: none ");
                }

                if (atLeastOneUpdate)
                {
                    _logger.LogDebug("Expected number of updated documents: none ");
                    _luceneReaderService.Evict();
                    writer.Flush(triggerMerge: true, applyAllDeletes: true);
                    _logger.LogInformation("Indexes are updated");
                }

                //update LastSyncPoint
                using (var dbConnection = await _SQLservice.GetConnection(cancellationToken))
                {
                    var dbCommand = @"UPDATE [dbo].[FTS_Config] SET LastSyncPoint = @currentSyncPoint";

                    var cmd = new SqlCommand(dbCommand, dbConnection);
                    cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset);
                    cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint;
                    try
                    {
                        await _SQLservice.ExecuteNonQueryWithRetryAsync(cmd, cancellationToken);

                        _logger.LogDebug("Last sync point is set to {0}", currentSyncPoint);
                    }
                    catch (Exception ex)
                    {
                        _logger.LogError(ex, "unexpected failure to update LastSyncPoint in database");
                        throw;
                    }
                }


                var result = new RebuildIndicesResponse
                {
                    IsValid          = true,
                    Success          = true,
                    NumberOfUpdates  = updatedCount,
                    NumberOfDeletes  = deletedCount,
                    CurrentSyncPoint = currentSyncPoint
                };

                return(result);
            }
            catch (LockObtainFailedException)
            {
                var result = new RebuildIndicesResponse();
                result.IsValid = false;
                result.Errors  = new List <string>();
                result.Errors.Add("Failed to lock full text search index file. Probaly there is another job is running. Please try again later.");
                return(result);
            }
            catch (Exception ex)
            {
                var result = new RebuildIndicesResponse();
                result.IsValid = false;
                result.Errors  = new List <string>();
                result.Errors.Add("Unexpected error occured: " + ex.Message);
                return(result);
            }
            finally
            {
                if (writer != null)
                {
                    writer.Dispose();
                }
                if (azureDirectory != null)
                {
                    azureDirectory.Dispose();
                }
            }
        }
 public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound)
 {
     MergePolicy.OneMerge merge = null;
     while ((merge = writer.NextMerge) != null)
     {
         if (VERBOSE)
         {
             Console.WriteLine("executing merge " + merge.SegString(writer.Directory));
         }
         writer.Merge(merge);
     }
 }
Example #43
0
        private void ProcessQueue(DataTable q, string indexPath)
        {
            rowsProcessed = 0;
            rowsToProcess = q.Rows.Count;

            // first process deletes with reader
            try
            {
                IndexReader reader = IndexReader.Open(indexPath);

                foreach (DataRow row in q.Rows)
                {
                    Term term = new Term("Key", row["ItemKey"].ToString());
                    try
                    {
                        reader.DeleteDocuments(term);
                        log.Debug("reader.DeleteDocuments(term) for Key " + row["ItemKey"].ToString());
                    }
                    catch (Exception ge)
                    {
                        // TODO: monitor what real exceptions if any occur and then
                        // change this catch to catch only the expected ones
                        // instead of non specific exception
                        log.Error(ge);
                    }

                    bool removeOnly = Convert.ToBoolean(row["RemoveOnly"]);
                    if (removeOnly)
                    {
                        Int64 rowId = Convert.ToInt64(row["RowId"]);
                        IndexingQueue.Delete(rowId);
                    }


                    if (DateTime.UtcNow > nextStatusUpdateTime)
                    {
                        // don't mark as complete because there may be more qu items
                        //for different index paths in a multi site installation
                        bool markAsComplete = false;
                        ReportStatus(markAsComplete);
                    }
                }

                reader.Close();
            }
            catch (IOException ex)
            {
                log.Info("IndexWriter swallowed exception this is not unexpected if building or rebuilding the search index ", ex);
                errorCount += 1;
            }
            catch (TypeInitializationException ex)
            {
                log.Info("IndexWriter swallowed exception ", ex);
                errorCount += 1;
            }


            // next add items with writer
            IndexWriter indexWriter = GetWriter(indexPath);

            if (indexWriter == null)
            {
                log.Error("failed to get IndexWriter for path: " + indexPath);
                errorCount += 1;
                return;
            }

            foreach (DataRow row in q.Rows)
            {
                bool removeOnly = Convert.ToBoolean(row["RemoveOnly"]);
                if (!removeOnly)
                {
                    try
                    {
                        IndexItem indexItem
                            = (IndexItem)SerializationHelper.DeserializeFromString(typeof(IndexItem), row["SerializedItem"].ToString());

                        Document doc = GetDocument(indexItem);
                        WriteToIndex(doc, indexWriter);
                        log.Debug("called WriteToIndex(doc, indexWriter) for key " + indexItem.Key);
                        Int64 rowId = Convert.ToInt64(row["RowId"]);
                        IndexingQueue.Delete(rowId);
                    }
                    catch (Exception ex)
                    {
                        log.Error(ex);
                    }
                }

                if (DateTime.UtcNow > nextStatusUpdateTime)
                {
                    // don't mark as complete because there may be more qu items
                    //for different index paths in a multi site installation
                    bool markAsComplete = false;
                    ReportStatus(markAsComplete);
                }
            }

            try
            {
                indexWriter.Optimize();
            }
            catch (IOException ex)
            {
                log.Error(ex);
            }

            try
            {
                indexWriter.Close();
            }
            catch (IOException ex)
            {
                log.Error(ex);
            }
        }
 public MyMergeThread(TestMergeSchedulerExternal.MyMergeScheduler outerInstance, IndexWriter writer, MergePolicy.OneMerge merge)
     : base(outerInstance, writer, merge)
 {
     this.OuterInstance = outerInstance;
     outerInstance.OuterInstance.MergeThreadCreated = true;
 }
Example #45
0
        private async Task UpdateIndicesWithAddedDocuments(DateTimeOffset lastSyncPoint, DateTimeOffset currentSyncPoint, int numberOfUpdates, IndexWriter indexWriter, CancellationToken cancellationToken)
        {
            using (var dbConnection = await _SQLservice.GetConnection(cancellationToken))
            {
                var startRow = 1;

                var dbCountCommand = @"SELECT Id, ISNULL(Name,''), ISNULL(Content,''), UpdatedAt from [dbo].[Test_Data] 
                        WHERE UpdatedAt >= @lastSyncPoint AND UpdatedAt < @currentSyncPoint AND DeletedAt IS NULL
                        ORDER BY Id ASC OFFSET @StartRow - 1 ROWS FETCH NEXT @RowsPerPage ROWS ONLY ";


                while (numberOfUpdates >= startRow)
                {
                    cancellationToken.ThrowIfCancellationRequested();

                    var cmd = new SqlCommand(dbCountCommand, dbConnection);
                    cmd.Parameters.Add("@lastSyncPoint", System.Data.SqlDbType.DateTimeOffset);
                    cmd.Parameters["@lastSyncPoint"].Value = lastSyncPoint;
                    cmd.Parameters.Add("@currentSyncPoint", System.Data.SqlDbType.DateTimeOffset);
                    cmd.Parameters["@currentSyncPoint"].Value = currentSyncPoint;
                    cmd.Parameters.Add("@StartRow", System.Data.SqlDbType.Int);
                    cmd.Parameters["@StartRow"].Value = startRow;
                    cmd.Parameters.Add("@RowsPerPage", System.Data.SqlDbType.Int);
                    cmd.Parameters["@RowsPerPage"].Value = PageSize;

                    try
                    {
                        using (var reader = await _SQLservice.ExecuteReaderWithRetryAsync(cmd, System.Data.CommandBehavior.SequentialAccess, cancellationToken))
                        {
                            while (await reader.ReadAsync())
                            {
                                var document_id = await reader.GetFieldValueAsync <int>(0);

                                var document_name = await reader.GetFieldValueAsync <string>(1);

                                var document_content = await reader.GetFieldValueAsync <string>(2);

                                var document_updatedAt = await reader.GetFieldValueAsync <DateTimeOffset>(3);

                                var updatedAtAsNumber = int.Parse(document_updatedAt.ToString("yyyyMMdd"));

                                var searchDocument = new SearchDocument()
                                {
                                    DocumentID = document_id.ToString(),
                                    Name       = document_name,
                                    Content    = document_content,
                                    UpdatedAt  = updatedAtAsNumber
                                };

                                var doc = new Lucene.Net.Documents.Document
                                {
                                    // StringField indexes but doesn't tokenize
                                    new Lucene.Net.Documents.StringField("doc_id", searchDocument.DocumentID, Lucene.Net.Documents.Field.Store.YES),
                                    new Lucene.Net.Documents.StringField("name", searchDocument.Name, Lucene.Net.Documents.Field.Store.YES),
                                    new Lucene.Net.Documents.TextField("content", searchDocument.Content, Lucene.Net.Documents.Field.Store.YES),
                                    new Lucene.Net.Documents.Int32Field("updated", searchDocument.UpdatedAt, Lucene.Net.Documents.Field.Store.YES)
                                };

                                indexWriter.AddDocument(doc);
                                startRow++;
                            }
                        }


                        _logger.LogDebug("Processed {0} records (of {1} total) for update", (startRow - 1), numberOfUpdates);
                    }
                    catch (Exception ex)
                    {
                        _logger.LogError(ex, "unexpected failure during indexes update");
                        throw;
                    }
                }

                _logger.LogInformation("Processed {0} records for update of FTS indices. Completed.", (startRow - 1));
            }
        }
Example #46
0
 public abstract void AddToLuceneIndex(T sampleData, IndexWriter writer);
 /// <summary>
 /// Open the <see cref="DirectoryReader"/> from this <see cref="IndexWriter"/>.
 /// </summary>
 protected virtual DirectoryReader OpenIndexReader(IndexWriter writer)
 {
     return(DirectoryReader.Open(writer, false));
 }
Example #48
0
 protected internal override void Publish(IndexWriter writer)
 {
     Debug.Assert(!m_published, "ticket was already publised - can not publish twice");
     m_published = true;
     FinishFlush(writer, segment, m_frozenUpdates);
 }
        public virtual void TestCRTReopen()
        {
            //test behaving badly

            //should be high enough
            int maxStaleSecs = 20;

            //build crap data just to store it.
            string s = "        abcdefghijklmnopqrstuvwxyz     ";

            char[]        chars   = s.ToCharArray();
            StringBuilder builder = new StringBuilder(2048);

            for (int i = 0; i < 2048; i++)
            {
                builder.Append(chars[Random().Next(chars.Length)]);
            }
            string content = builder.ToString();

            SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
            Directory         dir      = new NRTCachingDirectory(NewFSDirectory(CreateTempDir("nrt")), 5, 128);
            IndexWriterConfig config   = new IndexWriterConfig(
#pragma warning disable 612, 618
                Version.LUCENE_46,
#pragma warning restore 612, 618
                new MockAnalyzer(Random()));

            config.SetIndexDeletionPolicy(sdp);
            config.SetOpenMode(OpenMode.CREATE_OR_APPEND);
            IndexWriter         iw  = new IndexWriter(dir, config);
            SearcherManager     sm  = new SearcherManager(iw, true, new SearcherFactory());
            TrackingIndexWriter tiw = new TrackingIndexWriter(iw);
            ControlledRealTimeReopenThread <IndexSearcher> controlledRealTimeReopenThread =
                new ControlledRealTimeReopenThread <IndexSearcher>(tiw, sm, maxStaleSecs, 0);

            controlledRealTimeReopenThread.SetDaemon(true);
            controlledRealTimeReopenThread.Start();

            IList <ThreadClass> commitThreads = new List <ThreadClass>();

            for (int i = 0; i < 500; i++)
            {
                if (i > 0 && i % 50 == 0)
                {
                    ThreadClass commitThread = new RunnableAnonymousInnerClassHelper(this, sdp, dir, iw);
                    commitThread.Start();
                    commitThreads.Add(commitThread);
                }
                Document d = new Document();
                d.Add(new TextField("count", i + "", Field.Store.NO));
                d.Add(new TextField("content", content, Field.Store.YES));
                long start = Environment.TickCount;
                long l     = tiw.AddDocument(d);
                controlledRealTimeReopenThread.WaitForGeneration(l);
                long wait = Environment.TickCount - start;
                assertTrue("waited too long for generation " + wait, wait < (maxStaleSecs * 1000));
                IndexSearcher searcher = sm.Acquire();
                TopDocs       td       = searcher.Search(new TermQuery(new Term("count", i + "")), 10);
                sm.Release(searcher);
                assertEquals(1, td.TotalHits);
            }

            foreach (ThreadClass commitThread in commitThreads)
            {
                commitThread.Join();
            }

            controlledRealTimeReopenThread.Dispose();
            sm.Dispose();
            iw.Dispose();
            dir.Dispose();
        }
Example #50
0
 public static UmbracoExamineSearcher GetUmbracoSearcher(IndexWriter writer)
 {
     return(new UmbracoExamineSearcher(writer, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)));
 }
Example #51
0
        public virtual void Test_Directory() // LUCENENET specific - name collides with property of LuceneTestCase
        {
            Store.Directory indexDir = NewDirectory();
            Store.Directory taxoDir  = NewDirectory();
            IndexWriter     w        = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            var             tw       = new DirectoryTaxonomyWriter(taxoDir);

            // first empty commit
            w.Commit();
            tw.Commit();
            var          mgr    = new SearcherTaxonomyManager(indexDir, taxoDir, null);
            FacetsConfig config = new FacetsConfig();

            config.SetMultiValued("field", true);
            AtomicBoolean stop = new AtomicBoolean();

            // How many unique facets to index before stopping:
            //int ordLimit = TestNightly ? 100000 : 6000;
            // LUCENENET specific: 100000 facets takes about 2-3 hours. To keep it under
            // the 1 hour free limit of Azure DevOps, this was reduced to 30000.
            int ordLimit = TestNightly ? 30000 : 6000;

            var indexer = new IndexerThread(w, config, tw, mgr, ordLimit, stop);

            indexer.Start();

            try
            {
                while (!stop)
                {
                    SearcherAndTaxonomy pair = mgr.Acquire();
                    try
                    {
                        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
                        FacetsCollector sfc = new FacetsCollector();
                        pair.Searcher.Search(new MatchAllDocsQuery(), sfc);
                        Facets      facets = GetTaxonomyFacetCounts(pair.TaxonomyReader, config, sfc);
                        FacetResult result = facets.GetTopChildren(10, "field");
                        if (pair.Searcher.IndexReader.NumDocs > 0)
                        {
                            //System.out.println(pair.taxonomyReader.getSize());
                            Assert.True(result.ChildCount > 0);
                            Assert.True(result.LabelValues.Length > 0);
                        }

                        //if (VERBOSE) {
                        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
                        //}
                    }
                    finally
                    {
                        mgr.Release(pair);
                    }
                }
            }
            finally
            {
                indexer.Join();
            }

            if (Verbose)
            {
                Console.WriteLine("TEST: now stop");
            }

            IOUtils.Dispose(mgr, tw, w, taxoDir, indexDir);
        }
        public virtual void TestConcurrency()
        {
            AtomicInt32 numDocs  = new AtomicInt32(AtLeast(10000));
            Directory   indexDir = NewDirectory();
            Directory   taxoDir  = NewDirectory();
            ConcurrentDictionary <string, string> values = new ConcurrentDictionary <string, string>();
            IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
            var         tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, NewTaxoWriterCache(numDocs));

            ThreadJob[]  indexThreads = new ThreadJob[AtLeast(4)];
            FacetsConfig config       = new FacetsConfig();

            for (int i = 0; i < 10; i++)
            {
                config.SetHierarchical("l1." + i, true);
                config.SetMultiValued("l1." + i, true);
            }

            for (int i = 0; i < indexThreads.Length; i++)
            {
                indexThreads[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, values, iw, tw, config);
            }

            foreach (ThreadJob t in indexThreads)
            {
                t.Start();
            }
            foreach (ThreadJob t in indexThreads)
            {
                t.Join();
            }

            var tr = new DirectoryTaxonomyReader(tw);

            // +1 for root category
            if (values.Count + 1 != tr.Count)
            {
                foreach (string value in values.Keys)
                {
                    FacetLabel label = new FacetLabel(FacetsConfig.StringToPath(value));
                    if (tr.GetOrdinal(label) == -1)
                    {
                        Console.WriteLine("FAIL: path=" + label + " not recognized");
                    }
                }
                fail("mismatch number of categories");
            }
            int[] parents = tr.ParallelTaxonomyArrays.Parents;
            foreach (string cat in values.Keys)
            {
                FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(cat));
                Assert.True(tr.GetOrdinal(cp) > 0, "category not found " + cp);
                int        level     = cp.Length;
                int        parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
                FacetLabel path      = null;
                for (int i = 0; i < level; i++)
                {
                    path = cp.Subpath(i + 1);
                    int ord = tr.GetOrdinal(path);
                    Assert.AreEqual(parentOrd, parents[ord], "invalid parent for cp=" + path);
                    parentOrd = ord; // next level should have this parent
                }
            }

            IOUtils.Dispose(tw, iw, tr, taxoDir, indexDir);
        }
Example #53
0
        /// <summary>
        /// Initializes a new instance of the <see cref="LuceneIndex" /> class.
        /// </summary>
        /// <param name="indexPath">The path to the directory that will contain the Lucene index files.</param>
        /// <param name="schema">The schema.</param>
        /// <exception cref="System.ArgumentNullException"></exception>
        public LuceneIndex(string indexPath, Schema schema)
        {
            if (String.IsNullOrWhiteSpace(indexPath))
                throw new ArgumentNullException(nameof(indexPath)); 
            if (schema == null)
                throw new ArgumentNullException(nameof(schema));

            IndexPath = indexPath;
            Schema = schema;

            if (System.IO.Directory.Exists(IndexPath))
            {
                if (Schema.IsDefault())
                    throw new InvalidOperationException($"There is an existing index on '{IndexPath}'.");
            }                
            else
            {
                System.IO.Directory.CreateDirectory(IndexPath);
            }                        

            _indexDirectory = new MMapDirectory(Paths.get(IndexPath));

            var taxonomyIndexPath = System.IO.Path.Combine(IndexPath, "taxonomy");
            if (!System.IO.Directory.Exists(taxonomyIndexPath))            
                System.IO.Directory.CreateDirectory(taxonomyIndexPath);

            _taxonomyDirectory = new MMapDirectory(Paths.get(taxonomyIndexPath));         
                           
            _compositeAnalyzer = new CompositeAnalyzer(Schema);            

            _ramBufferSizeMB = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.RAMBufferSizeMB"] ?? "128");

            var config = new IndexWriterConfig(_compositeAnalyzer)                            
                            .SetOpenMode(IndexWriterConfigOpenMode.CREATE_OR_APPEND)
                            .SetRAMBufferSizeMB(_ramBufferSizeMB)
                            .SetCommitOnClose(true);                            
            
            _indexWriter = new IndexWriter(_indexDirectory, config);
            _taxonomyWriter = new DirectoryTaxonomyWriter(_taxonomyDirectory, IndexWriterConfigOpenMode.CREATE_OR_APPEND);

            _searcherTaxonomyManager = new SearcherTaxonomyManager(_indexWriter, true, null, _taxonomyWriter);            
            _facetBuilder = new LuceneFacetBuilder(_taxonomyWriter);                        

            _refreshIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexSearcher.RefreshIntervalSeconds"] ?? "0.5");    
            _commitIntervalSeconds = Double.Parse(ConfigurationManager.AppSettings["IndexWriter.CommitIntervalSeconds"] ?? "60");
           
            _writeAllowedFlag = new ManualResetEventSlim(true);

            _refreshTimer = new Timer(o => Refresh(), null, TimeSpan.FromSeconds(_refreshIntervalSeconds), TimeSpan.FromSeconds(_refreshIntervalSeconds));
            _commitTimer = new Timer(o => Commit(), null, TimeSpan.FromSeconds(_commitIntervalSeconds), TimeSpan.FromSeconds(_commitIntervalSeconds));

        }
 public ThreadAnonymousInnerClassHelper(TestConcurrentFacetedIndexing outerInstance, AtomicInt32 numDocs, ConcurrentDictionary <string, string> values, IndexWriter iw, Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter tw, FacetsConfig config)
 {
     this.outerInstance = outerInstance;
     this.numDocs       = numDocs;
     this.values        = values;
     this.iw            = iw;
     this.tw            = tw;
     this.config        = config;
 }
        public void TestSubclassConcurrentMergeScheduler()
        {
            MockDirectoryWrapper dir = NewMockDirectory();
            dir.FailOn(new FailOnlyOnMerge());

            Document doc = new Document();
            Field idField = NewStringField("id", "", Field.Store.YES);
            doc.Add(idField);

            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new MyMergeScheduler(this)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy()));
            LogMergePolicy logMP = (LogMergePolicy)writer.Config.MergePolicy;
            logMP.MergeFactor = 10;
            for (int i = 0; i < 20; i++)
            {
                writer.AddDocument(doc);
            }

            ((MyMergeScheduler)writer.Config.MergeScheduler).Sync();
            writer.Dispose();

            Assert.IsTrue(MergeThreadCreated);
            Assert.IsTrue(MergeCalled);
            Assert.IsTrue(ExcCalled);
            dir.Dispose();
        }
Example #56
0
        /// <summary>
        ///
        /// </summary>
        public void IndexCreate()
        {
            analyzer = new PanGuAnalyzer();

            IndexWriter writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexDirectory)), analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
        }
 public void TestCustomMergeScheduler()
 {
     // we don't really need to execute anything, just to make sure the custom MS
     // compiles. But ensure that it can be used as well, e.g., no other hidden
     // dependencies or something. Therefore, don't use any random API !
     Directory dir = new RAMDirectory();
     IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null);
     conf.SetMergeScheduler(new ReportingMergeScheduler());
     IndexWriter writer = new IndexWriter(dir, conf);
     writer.AddDocument(new Document());
     writer.Commit(); // trigger flush
     writer.AddDocument(new Document());
     writer.Commit(); // trigger flush
     writer.ForceMerge(1);
     writer.Dispose();
     dir.Dispose();
 }
Example #58
0
 protected internal abstract void Publish(IndexWriter writer);
 public RunnableAnonymousInnerClassHelper(TestControlledRealTimeReopenThread outerInstance, SnapshotDeletionPolicy sdp, Directory dir, IndexWriter iw)
 {
     this.outerInstance = outerInstance;
     this.sdp           = sdp;
     this.dir           = dir;
     this.iw            = iw;
 }
Example #60
0
        /**
         * Split a given index into 3 indexes for training, test and cross validation tasks respectively
         *
         * @param originalIndex        an {@link AtomicReader} on the source index
         * @param trainingIndex        a {@link Directory} used to write the training index
         * @param testIndex            a {@link Directory} used to write the test index
         * @param crossValidationIndex a {@link Directory} used to write the cross validation index
         * @param analyzer             {@link Analyzer} used to create the new docs
         * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
         * @throws IOException if any writing operation fails on any of the indexes
         */
        public void Split(AtomicReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, params string[] fieldNames)
        {
            // create IWs for train / test / cv IDXs
            IndexWriter testWriter     = new IndexWriter(testIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer));
            IndexWriter cvWriter       = new IndexWriter(crossValidationIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer));
            IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(Util.LuceneVersion.LUCENE_CURRENT, analyzer));

            try
            {
                int size = originalIndex.MaxDoc;

                IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
                TopDocs       topDocs       = indexSearcher.Search(new MatchAllDocsQuery(), Int32.MaxValue);

                // set the type to be indexed, stored, with term vectors
                FieldType ft = new FieldType(TextField.TYPE_STORED);
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = true;
                ft.StoreTermVectorPositions = true;

                int b = 0;

                // iterate over existing documents
                foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
                {
                    // create a new document for indexing
                    Document doc = new Document();
                    if (fieldNames != null && fieldNames.Length > 0)
                    {
                        foreach (String fieldName in fieldNames)
                        {
                            doc.Add(new Field(fieldName, originalIndex.Document(scoreDoc.Doc).GetField(fieldName).ToString(), ft));
                        }
                    }
                    else
                    {
                        foreach (IndexableField storableField in originalIndex.Document(scoreDoc.Doc).Fields)
                        {
                            if (storableField.ReaderValue != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.ReaderValue, ft));
                            }
                            else if (storableField.BinaryValue != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.BinaryValue, ft));
                            }
                            else if (storableField.StringValue != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.StringValue, ft));
                            }
                            else if (storableField.NumericValue != null)
                            {
                                doc.Add(new Field(storableField.Name, storableField.NumericValue.ToString(), ft));
                            }
                        }
                    }

                    // add it to one of the IDXs
                    if (b % 2 == 0 && testWriter.MaxDoc < size * _testRatio)
                    {
                        testWriter.AddDocument(doc);
                    }
                    else if (cvWriter.MaxDoc < size * _crossValidationRatio)
                    {
                        cvWriter.AddDocument(doc);
                    }
                    else
                    {
                        trainingWriter.AddDocument(doc);
                    }
                    b++;
                }
            }
            catch (Exception e)
            {
                throw new IOException("Exceptio in DatasetSplitter", e);
            }
            finally
            {
                testWriter.Commit();
                cvWriter.Commit();
                trainingWriter.Commit();
                // close IWs
                testWriter.Dispose();
                cvWriter.Dispose();
                trainingWriter.Dispose();
            }
        }