An IndexWriter creates and maintains an index. The third argument to the constructor determines whether a new index is created, or whether an existing index is opened for the addition of new documents. In either case, documents are added with the addDocument method. When finished adding documents, close should be called.

If an index will not have more documents added for a while and optimal search performance is desired, then the optimize method should be called before the index is closed.

Opening an IndexWriter creates a lock file for the directory in use. Trying to open another IndexWriter on the same directory will lead to an IOException. The IOException is also thrown if an IndexReader on the same directory is used to delete documents from the index.

        public void SetUp()
        {

            var writer = new IndexWriter(store, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

            var doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();
        }
Esempio n. 2
0
        public virtual void  TestNPESpanQuery()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), IndexWriter.MaxFieldLength.LIMITED, null);

            // Add documents
            AddDoc(writer, "1", "the big dogs went running to the market");
            AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly");

            // Commit
            writer.Close();

            // Get searcher
            IndexReader   reader   = IndexReader.Open(dir, true, null);
            IndexSearcher searcher = new IndexSearcher(reader);

            // Control (make sure docs indexed)
            Assert.AreEqual(2, HitCount(searcher, "the"));
            Assert.AreEqual(1, HitCount(searcher, "cat"));
            Assert.AreEqual(1, HitCount(searcher, "dogs"));
            Assert.AreEqual(0, HitCount(searcher, "rabbit"));

            // This throws exception (it shouldn't)
            Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10, null).TotalHits);
            reader.Close();
            dir.Close();
        }
Esempio n. 3
0
		private static Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = new System.Random((System.Int32) (BASE_SEED + 42));
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
        public virtual void TestLucene()
        {
            int num = 100;

            Directory indexA = NewDirectory();
            Directory indexB = NewDirectory();

            FillIndex(Random(), indexA, 0, num);
            bool fail = VerifyIndex(indexA, 0);
            if (fail)
            {
                Assert.Fail("Index a is invalid");
            }

            FillIndex(Random(), indexB, num, num);
            fail = VerifyIndex(indexB, num);
            if (fail)
            {
                Assert.Fail("Index b is invalid");
            }

            Directory merged = NewDirectory();

            IndexWriter writer = new IndexWriter(merged, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(2)));
            writer.AddIndexes(indexA, indexB);
            writer.ForceMerge(1);
            writer.Dispose();

            fail = VerifyIndex(merged, 0);

            Assert.IsFalse(fail, "The merged index is invalid");
            indexA.Dispose();
            indexB.Dispose();
            merged.Dispose();
        }
        /// <summary>
        /// Set up a new index in RAM with three test phrases and the supplied Analyzer.
        /// </summary>
        /// <exception cref="Exception"> if an error occurs with index writer or searcher </exception>
        public override void SetUp()
        {
            base.SetUp();
            analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), 2);
            directory = NewDirectory();
            IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

            Document doc;
            doc = new Document();
            doc.Add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new TextField("content", "just another test sentence", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new TextField("content", "a sentence which contains no test", Field.Store.YES));
            writer.AddDocument(doc);

            writer.Dispose();

            reader = DirectoryReader.Open(directory);
            searcher = NewSearcher(reader);
        }
        public void SetUp()
        {

            IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);

            Document doc;

            doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();
        }
		//Rolls back index to a chosen ID
		private void  RollBackLast(int id)
		{
			
			// System.out.println("Attempting to rollback to "+id);
			System.String ids = "-" + id;
			IndexCommit last = null;
			IList<IndexCommit> commits = IndexReader.ListCommits(dir);
			for (System.Collections.IEnumerator iterator = commits.GetEnumerator(); iterator.MoveNext(); )
			{
				IndexCommit commit = (IndexCommit) iterator.Current;
                System.Collections.Generic.IDictionary<string, string> ud = commit.GetUserData();
				if (ud.Count > 0)
					if (((System.String) ud["index"]).EndsWith(ids))
						last = commit;
			}
			
			if (last == null)
				throw new System.SystemException("Couldn't find commit point " + id);
			
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), new RollbackDeletionPolicy(this, id), MaxFieldLength.UNLIMITED, last);
            System.Collections.Generic.IDictionary<string, string> data = new System.Collections.Generic.Dictionary<string, string>();
			data["index"] = "Rolled back to 1-" + id;
			w.Commit(data);
			w.Close();
		}
Esempio n. 8
0
        internal virtual void  BuildDir(Directory dir, int nDocs, int maxFields, int maxFieldLen)
        {
            IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            iw.SetMaxBufferedDocs(10);
            for (int j = 0; j < nDocs; j++)
            {
                Document d       = new Document();
                int      nFields = r.Next(maxFields);
                for (int i = 0; i < nFields; i++)
                {
                    int flen = r.Next(maxFieldLen);
                    System.Text.StringBuilder sb = new System.Text.StringBuilder("^ ");
                    while (sb.Length < flen)
                    {
                        sb.Append(' ').Append(words[r.Next(words.Length)]);
                    }
                    sb.Append(" $");
                    Field.Store store = Field.Store.YES;                     // make random later
                    Field.Index index = Field.Index.ANALYZED;                // make random later
                    d.Add(new Field("f" + i, sb.ToString(), store, index));
                }
                iw.AddDocument(d, null);
            }
            iw.Close();
        }
Esempio n. 9
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
            long         theLong   = System.Int64.MaxValue;
            double       theDouble = System.Double.MaxValue;
            sbyte        theByte   = (sbyte)System.SByte.MaxValue;
            short        theShort  = System.Int16.MaxValue;
            int          theInt    = System.Int32.MaxValue;
            float        theFloat  = System.Single.MaxValue;

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theByte", System.Convert.ToString((sbyte)theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc, null);
            }
            writer.Close();
            reader = IndexReader.Open((Directory)directory, true, null);
        }
Esempio n. 10
0
        private static IndexWriter GetWriter()
        {
            var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            var writer   = new Lucene.Net.Index.IndexWriter(IndexDir, analyzer);

            return(writer);
        }
Esempio n. 11
0
 public LuceneIndexer()
 {
     luceneIndexDirectory = null;
     writer   = null;
     analyzer = null;
     parser   = null;
 }
Esempio n. 12
0
 public SearchEngine()
 {
     luceneIndexDirectory = null; // Is set in Create Index
     analyzer             = null; // Is set in CreateAnalyser
     writer = null;               // Is set in CreateWriter
     CSVdoc = new CSVDocument();
 }
Esempio n. 13
0
        void Index()
        {
            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            Lucene.Net.Documents.Document doc = null;
            Lucene.Net.Documents.Field    f   = null;

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            wr.Close();
        }
Esempio n. 14
0
 protected override void CreateIndex(string indexPath)
 {
     luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
     IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
     writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
     writer.SetSimilarity(similarity);
 }
Esempio n. 15
0
        public void TestEmptyChildFilter()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
            // we don't want to merge - since we rely on certain segment setup
            IndexWriter w = new IndexWriter(dir, config);

            IList<Document> docs = new List<Document>();

            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));
            w.AddDocuments(docs);

            docs.Clear();
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));
            w.AddDocuments(docs);
            w.Commit();
            int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field
            for (int i = 0; i < num; i++)
            {
                docs.Clear();
                docs.Add(MakeJob("java", 2007));
                w.AddDocuments(docs);
            }

            IndexReader r = DirectoryReader.Open(w, Random().NextBoolean());
            w.Dispose();
            assertTrue(r.Leaves.size() > 1);
            IndexSearcher s = new IndexSearcher(r);
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            BooleanQuery fullQuery = new BooleanQuery();
            fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST));
            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
            s.Search(fullQuery, c);
            TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
            assertFalse(float.IsNaN(results.MaxScore));
            assertEquals(1, results.TotalGroupedHitCount);
            assertEquals(1, results.Groups.Length);
            IGroupDocs<int> group = results.Groups[0];
            Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
            assertEquals("java", childDoc.Get("skill"));
            assertNotNull(group.GroupValue);
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

            r.Dispose();
            dir.Dispose();
        }
Esempio n. 16
0
        public LuceneApp()
        {
            luceneIndexDirectory = null;
            analyzer             = null;
            writer        = null;
            newSimilarity = new NewSimilarity();
            parserFields  = new string[] { DOC_TITLE, DOC_AUTHOR, DOC_BIB, DOC_BODY };
            fieldWeights  = new Dictionary <string, float>();
            foreach (string field in parserFields)
            {
                fieldWeights.Add(field, 1);
            }

            // Init WordNet
            // Src: https://developer.syn.co.in/tutorial/wordnet/tutorial.html
            var directory = "../../../wordnetdic";

            wordNetEngine = new WordNetEngine();

            // data sources
            wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.adj")), PartOfSpeech.Adjective);
            wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.adv")), PartOfSpeech.Adverb);
            wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.noun")), PartOfSpeech.Noun);
            wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.verb")), PartOfSpeech.Verb);

            // indexes
            wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adj")), PartOfSpeech.Adjective);
            wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adv")), PartOfSpeech.Adverb);
            wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.noun")), PartOfSpeech.Noun);
            wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.verb")), PartOfSpeech.Verb);

            Console.WriteLine("Loading database...");
            wordNetEngine.Load();
            Console.WriteLine("Load completed.");
        }
        private static void IndexIndicator(IndicatorMetadata indicatorMetadata,
            IEnumerable<IndicatorMetadataTextProperty> properties, IndexWriter writer)
        {
            Document doc = new Document();
            doc.Add(new Field("id", indicatorMetadata.IndicatorId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

            var text = indicatorMetadata.Descriptive;

            StringBuilder sb = new StringBuilder();
            foreach (var indicatorMetadataTextProperty in properties)
            {
                var key = indicatorMetadataTextProperty.ColumnName;

                if (text.ContainsKey(key))
                {
                    sb.Append(text[key]);
                    sb.Append(" ");
                }
            }

            doc.Add(new Field("IndicatorText",
                  sb.ToString().ToLower(), Field.Store.NO,
                  Field.Index.ANALYZED));

            writer.AddDocument(doc);
        }
Esempio n. 18
0
 public LuceneAdvancedSearchApplication()
 {
     luceneIndexDirectory = null;
     writer   = null;
     analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English");
     parser   = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
 }
Esempio n. 19
0
        private void btnInitAnalyzer_Click(object sender, EventArgs e)
        {
            using (new BusyObject(this))
            {
                if (analyzer == null)
                {
                    string hspellPath = SelectProjectFolder("Select a path to HSpell data files", "hspell-data-files" + System.IO.Path.DirectorySeparatorChar);
                    if (hspellPath == null)
                        return;

                    MorphAnalyzer a = new MorphAnalyzer(hspellPath);
                    if (!a.IsInitialized)
                    {
                        MessageBox.Show("Error while trying to create a morphological analyzer object; please check the existance of the required data files and try again");
                        return;
                    }

                    analyzer = a;
                }

                // Recreate the index
                IndexWriter writer = new IndexWriter(FSDirectory.Open(tempPath), new Lucene.Net.Analysis.SimpleAnalyzer(), true, new IndexWriter.MaxFieldLength(10));
                writer.Close();
            }

            btnIndexAddFolder.Enabled = true;
            btnRunAutoTests.Enabled = true;
            btnExecuteSearch.Enabled = true;
        }
Esempio n. 20
0
        //删除全部索引
        public void delAllIndex()
        {
            if (System.IO.Directory.Exists(indexPath) == false)
            {
                System.IO.Directory.CreateDirectory(indexPath);
            }
            FSDirectory fsDirectory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());

            if (!IndexReader.IndexExists(fsDirectory))
            {
                return;
            }
            else
            {
                if (IndexReader.IsLocked(fsDirectory))
                {
                    IndexReader.Unlock(fsDirectory);
                }
            }
            Lucene.Net.Index.IndexWriter iw = new Lucene.Net.Index.IndexWriter(indexPath, new PanGuAnalyzer(), false);
            //  iw.DeleteDocuments(new Lucene.Net.Index.Term("Key", key));
            iw.DeleteAll();
            iw.Optimize();//删除文件后并非从磁盘中移除,而是生成一个.del的文件,需要调用Optimize方法来清除。在清除文件前可以使用UndeleteAll方法恢复
            iw.Close();
        }
 /// <summary>
 /// 
 /// </summary>
 /// <param name="p"></param>
 /// <param name="writer"></param>
 private static void AddDocumentToIndex(Product p, IndexWriter writer)
 {
     Document doc = new Document();
     doc.Add(new Field("Name",
                        p.Name,
                        Field.Store.YES,
                        Field.Index.ANALYZED,
                        Lucene.Net.Documents.Field.TermVector.YES
                        )
              );
     doc.Add(new Field("Origin",
                        p.Origin.ToString(),
                        Field.Store.YES,
                        Field.Index.ANALYZED,
                        Lucene.Net.Documents.Field.TermVector.YES
                        )
              );
     doc.Add(new Field("Price",
                        p.Price.ToString(),
                        Field.Store.YES,
                        Field.Index.ANALYZED,
                        Lucene.Net.Documents.Field.TermVector.YES
                        )
              );
     writer.AddDocument(doc);
 }
Esempio n. 22
0
        void Index()
        {
            var conf = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(LuceneVersion.LUCENE_CURRENT));

            Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, conf /*new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(LuceneVersion.LUCENE_CURRENT), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED*/);

            Lucene.Net.Documents.Document doc = null;
            Lucene.Net.Documents.Field    f   = null;

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            doc = new Lucene.Net.Documents.Document();
            f   = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED);
            doc.Add(f);
            wr.AddDocument(doc);

            wr.Dispose();
        }
Esempio n. 23
0
        static void Proc()
        {
            var uri = GetRandomWikiPage();
            queue.Enqueue(uri);
            using (var dir = new Lucene.Net.Store.SimpleFSDirectory(new DirectoryInfo("..\\..\\idx"))) {
                using (var indexWriter = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH))) {
                    while (true) {
                        string page;

                        if (queue.TryDequeue(out page)) {
                            visited.AddOrUpdate(page, true, (p, b) => true);
                            try {
                                ProcessPage(page, indexWriter);
                            }
                            catch (Exception) {
                                Console.WriteLine("ERROR");
                            }
                            if (Console.KeyAvailable) {
                                var x = Console.ReadKey();
                                if (x.Key == ConsoleKey.Spacebar) {

                                    break;
                                }
                            }
                        }
                        else {
                            break;
                        }
                    }
                }
            }
        }
        public virtual void  TestDanish()
        {
            /* build an index */
            RAMDirectory danishIndex = new RAMDirectory();
            IndexWriter  writer      = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null);

            // Danish collation orders the words below in the given order
            // (example taken from TestSort.testInternationalSort() ).
            System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" };
            for (int docnum = 0; docnum < words.Length; ++docnum)
            {
                Document doc = new Document();
                doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc, null);
            }
            writer.Optimize(null);
            writer.Close();

            IndexReader   reader = IndexReader.Open((Directory)danishIndex, true, null);
            IndexSearcher search = new IndexSearcher(reader);

            System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo;

            // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
            // but Danish collation does.
            ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000, null).ScoreDocs;
            AssertEquals("The index Term should be included.", 1, result.Length);

            result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000, null).ScoreDocs;
            AssertEquals("The index Term should not be included.", 0, result.Length);
            search.Close();
        }
		public virtual void  TestSorting()
		{
			Directory directory = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetMaxBufferedDocs(2);
			writer.SetMergeFactor(1000);
			writer.AddDocument(Adoc(new System.String[]{"id", "a", "title", "ipod", "str_s", "a"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "b", "title", "ipod ipod", "str_s", "b"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "c", "title", "ipod ipod ipod", "str_s", "c"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "x", "title", "boosted", "str_s", "x"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "y", "title", "boosted boosted", "str_s", "y"}));
			writer.AddDocument(Adoc(new System.String[]{"id", "z", "title", "boosted boosted boosted", "str_s", "z"}));
			
			IndexReader r = writer.GetReader();
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(r);
			
			RunTest(searcher, true);
			RunTest(searcher, false);
			
			searcher.Close();
			r.Close();
			directory.Close();
		}
Esempio n. 26
0
        public virtual void  TestFarsi()
        {
            /* build an index */
            RAMDirectory farsiIndex = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null);
            Document     doc        = new Document();

            doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.AddDocument(doc, null);

            writer.Optimize(null);
            writer.Close();

            IndexReader   reader = IndexReader.Open((Directory)farsiIndex, true, null);
            IndexSearcher search = new IndexSearcher(reader);
            Query         q      = new TermQuery(new Term("body", "body"));

            // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
            // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
            // characters properly.
            System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo;

            // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
            // orders the U+0698 character before the U+0633 character, so the single
            // index Term below should NOT be returned by a TermRangeFilter with a Farsi
            // Collator (or an Arabic one for the case when Farsi is not supported).
            int numHits = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000, null).TotalHits;

            Assert.AreEqual(0, numHits, "The index Term should not be included.");

            numHits = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000, null).TotalHits;
            Assert.AreEqual(1, numHits, "The index Term should be included.");
            search.Close();
        }
Esempio n. 27
0
        private static RAMDirectory MakeEmptyIndex(int numDeletedDocs)
        {
            RAMDirectory d = new RAMDirectory();
            IndexWriter  w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null);

            for (int i = 0; i < numDeletedDocs; i++)
            {
                w.AddDocument(new Document(), null);
            }
            w.Commit(null);
            w.DeleteDocuments(null, new MatchAllDocsQuery());
            w.Commit(null);

            if (0 < numDeletedDocs)
            {
                Assert.IsTrue(w.HasDeletions(null), "writer has no deletions");
            }

            Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs");
            Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs");
            w.Close();
            IndexReader r = IndexReader.Open((Directory)d, true, null);

            Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs");
            r.Close();
            return(d);
        }
 /// <summary>
 /// Creates the index at indexPath
 /// </summary>
 /// <param name="indexPath">Directory path to create the index</param>
 public void CreateIndex(string indexPath)
 {
     luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
     analyzer             = new Lucene.Net.Analysis.SimpleAnalyzer();
     IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
     writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
 }
Esempio n. 29
0
        public LuceneIndexSearch(string analyzer_str)
        {
            luceneIndexDirectory = null;
            writer = null;
            switch (analyzer_str)
            {
            case "Simple Analyzer":
                analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
                Console.WriteLine("Simple Analyzer");
                break;

            case "Standard Analyzer":
                analyzer = analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                Console.WriteLine("Standard Analyzer");
                break;

            case "Snowball Analyzer":
                // SnowballAnalyzer's second var "name" is the language of stemmer
                analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "English");
                Console.WriteLine("Snowball Analyzer");
                break;

            default:
                analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
                break;
            }

            parser       = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN_PASS_TEXT, analyzer);
            mySimilarity = new NewSimilarity();
        }
Esempio n. 30
0
        public virtual void  TestCompressionTools()
        {
            IFieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES);
            IFieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES);

            Document doc = new Document();

            doc.Add(binaryFldCompressed);
            doc.Add(stringFldCompressed);

            /* add the doc to a ram index */
            MockRAMDirectory dir    = new MockRAMDirectory();
            IndexWriter      writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.AddDocument(doc, null);
            writer.Close();

            /* open a reader and fetch the document */
            IndexReader reader        = IndexReader.Open((Directory)dir, false, null);
            Document    docFromReader = reader.Document(0, null);

            Assert.IsTrue(docFromReader != null);

            /* fetch the binary compressed field and compare it's content with the original one */
            System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed", null))));
            Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed));
            Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed", null)).Equals(binaryValCompressed));

            reader.Close();
            dir.Close();
        }
Esempio n. 31
0
		private void AddData(IndexWriter writer)
		{
			AddPoint(writer, "McCormick &amp; Schmick's Seafood Restaurant", 38.9579000, -77.3572000);
			AddPoint(writer, "Jimmy's Old Town Tavern", 38.9690000, -77.3862000);
			AddPoint(writer, "Ned Devine's", 38.9510000, -77.4107000);
			AddPoint(writer, "Old Brogue Irish Pub", 38.9955000, -77.2884000);
			AddPoint(writer, "Alf Laylah Wa Laylah", 38.8956000, -77.4258000);
			AddPoint(writer, "Sully's Restaurant &amp; Supper", 38.9003000, -77.4467000);
			AddPoint(writer, "TGI Friday", 38.8725000, -77.3829000);
			AddPoint(writer, "Potomac Swing Dance Club", 38.9027000, -77.2639000);
			AddPoint(writer, "White Tiger Restaurant", 38.9027000, -77.2638000);
			AddPoint(writer, "Jammin' Java", 38.9039000, -77.2622000);
			AddPoint(writer, "Potomac Swing Dance Club", 38.9027000, -77.2639000);
			AddPoint(writer, "WiseAcres Comedy Club", 38.9248000, -77.2344000);
			AddPoint(writer, "Glen Echo Spanish Ballroom", 38.9691000, -77.1400000);
			AddPoint(writer, "Whitlow's on Wilson", 38.8889000, -77.0926000);
			AddPoint(writer, "Iota Club and Cafe", 38.8890000, -77.0923000);
			AddPoint(writer, "Hilton Washington Embassy Row", 38.9103000, -77.0451000);
			AddPoint(writer, "HorseFeathers, Bar & Grill", 39.01220000000001, -77.3942);
			AddPoint(writer, "Marshall Island Airfield", 7.06, 171.2);
			AddPoint(writer, "Midway Island", 25.7, -171.7);
			AddPoint(writer, "North Pole Way", 55.0, 4.0);

			writer.Commit();
			writer.Close();
		}
Esempio n. 32
0
        public virtual void  TestRAMDirectorySize()
        {
            Directory        dir    = FSDirectory.Open(indexDir);
            MockRAMDirectory ramDir = new MockRAMDirectory(dir);

            dir.Close();
            IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.Optimize(null);

            Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());

            ThreadClass[] threads = new ThreadClass[numThreads];
            for (int i = 0; i < numThreads; i++)
            {
                int num = i;
                threads[i] = new AnonymousClassThread(num, writer, ramDir, this);
            }
            for (int i = 0; i < numThreads; i++)
            {
                threads[i].Start();
            }
            for (int i = 0; i < numThreads; i++)
            {
                threads[i].Join();
            }

            writer.Optimize(null);
            Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());

            writer.Close();
        }
Esempio n. 33
0
 /// <summary>
 /// 创建索引文档
 /// </summary>
 /// <param name="dic"></param>
 public void AddLuceneIndex(Dictionary<string, string> dic) {
     //var analyzer = new StandardAnalyzer(Version.LUCENE_30);
     var analyzer = GetAnalyzer();
     using (var directory = GetLuceneDirectory())
     using (var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) {
         var doc = new Document();
         foreach (KeyValuePair<string, string> pair in dic) {
             // add new index entry
             //Field.Store.YES:表示是否存储原值。
             //只有当Field.Store.YES在后面才能用doc.Get("number")取出值来
             //Field.Index. NOT_ANALYZED:不进行分词保存
             //todo:boost
             if (NotAnalyzeFields.Exists(one => one == pair.Key)) {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.NOT_ANALYZED));
             }
             else {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.ANALYZED));
             }
         }
         //doc.Boost
         writer.AddDocument(doc);
         writer.Commit();
         writer.Optimize();
         analyzer.Close();
     }
 }
Esempio n. 34
0
        public override void  SetUp()
        {
            base.SetUp();
            System.String tempDir = System.IO.Path.GetTempPath();
            if (tempDir == null)
            {
                throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
            }
            indexDir = new System.IO.DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex"));

            Directory   dir    = FSDirectory.Open(indexDir);
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
            // add some documents
            Document doc = null;

            for (int i = 0; i < docsToAdd; i++)
            {
                doc = new Document();
                doc.Add(new Field("content", English.IntToEnglish(i).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc, null);
            }
            Assert.AreEqual(docsToAdd, writer.MaxDoc());
            writer.Close();
            dir.Close();
        }
Esempio n. 35
0
 private static void AddTextToIndex(int txts, string text, IndexWriter writer)
 {
     Document doc = new Document();
     doc.Add(new Field("id", txts.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
     doc.Add(new Field("postBody", text, Field.Store.YES, Field.Index.TOKENIZED));
     writer.AddDocument(doc);
 }
        // Creates index based on selection of analyzer
        public void CreateIndex(string indexPath, string name)
        {
            luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
            if (name == "WhitespaceAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            }
            if (name == "SimpleAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
            }

            if (name == "StandardAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION);
            }


            if (name == "StopAnalyzer")
            {
                analyzer = new Lucene.Net.Analysis.StopAnalyzer(VERSION);
            }
            else
            {
                writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
            }

            writer.SetSimilarity(customSimilarity);
        }
Esempio n. 37
0
		private Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = NewRandom();
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Document doc = new Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
Esempio n. 38
0
        private void  Add(System.String s, IndexWriter writer)
        {
            Document doc = new Document();

            doc.Add(new Field("body", s, Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc, null);
        }
Esempio n. 39
0
        /// <summary>
        /// Remove text from the existing index.
        /// </summary>
        /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param>
        /// <param name="names">An array of unique names for the text.</param>
        public void RemoveText(DirectoryInfo directoryIndexInfo, string[] names)
        {
            Lucene.Net.Index.IndexWriter writer    = null;
            Lucene.Net.Store.Directory   directory = null;

            try
            {
                // If exists.
                if (names != null && names.Length > 0)
                {
                    // Create the analyzer.
                    SimpleAnalyzer   simpleAnalyzer   = new Analyzer.SimpleAnalyzer();
                    StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer);

                    // Create the index writer.
                    directory = FSDirectory.Open(directoryIndexInfo);
                    IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer);
                    indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND);

                    // Open existing or create new.
                    writer = new IndexWriter(directory, indexConfig);

                    // Create the query.
                    List <Query> queries = new List <Query>();

                    // For each name.
                    foreach (string name in names)
                    {
                        // Create the query.
                        BooleanQuery query = new BooleanQuery();
                        query.Add(new TermQuery(new Term("textname", name.ToLower())), BooleanClause.Occur.MUST);

                        // Add the query.
                        queries.Add(query);
                    }

                    // Delete the text.
                    writer.DeleteDocuments(queries.ToArray());

                    // Commit the index.
                    writer.Commit();
                }
            }
            catch (Exception)
            {
                throw;
            }
            finally
            {
                if (writer != null)
                {
                    writer.Dispose();
                }

                if (directory != null)
                {
                    directory.Dispose();
                }
            }
        }
Esempio n. 40
0
        public virtual void  TestBooleanQueryContainingSingleTermPrefixQuery()
        {
            // this tests against bug 33161 (now fixed)
            // In order to cause the bug, the outer query must have more than one term
            // and all terms required.
            // The contained PhraseMultiQuery must contain exactly one term array.

            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            Add("blueberry pie", writer);
            Add("blueberry chewing gum", writer);
            Add("blue raspberry pie", writer);
            writer.Optimize(null);
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore, true, null);
            // This query will be equivalent to +body:pie +body:"blue*"
            BooleanQuery q = new BooleanQuery();

            q.Add(new TermQuery(new Term("body", "pie")), Occur.MUST);

            MultiPhraseQuery trouble = new MultiPhraseQuery();

            trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") });
            q.Add(trouble, Occur.MUST);

            // exception will be thrown here without fix
            ScoreDoc[] hits = searcher.Search(q, null, 1000, null).ScoreDocs;

            Assert.AreEqual(2, hits.Length, "Wrong number of hits");
            searcher.Close();
        }
        //END
        //this method creates document from an ObjectToIndex
        public void BuildIndex(FileToIndex file)
        {
            using (var analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Version.LUCENE_30))
            {
                using (IndexWriter idxw = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    //check if document exists, if true deletes existing

                    var searchQuery = new TermQuery(new Term("Id", file.Id.ToString()));
                    idxw.DeleteDocuments(searchQuery);
                    //creation
                    Document doc = new Document();
                    doc.Add(new Field("Id", file.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//аналайзер разбивает строки на слова
                    doc.Add(new Field("Title", file.Title, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Description", file.Description, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Authors", file.Authors, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Text", file.Text, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Hashtags", file.Hashtags, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Discipline", file.Discipline, Field.Store.YES, Field.Index.ANALYZED));
                    //write the document to the index
                    idxw.AddDocument(doc);
                    //optimize and close the writer
                    idxw.Commit();

                    idxw.Optimize();

                }
            }
        }
Esempio n. 42
0
        public virtual void  TestPhrasePrefixWithBooleanQuery()
        {
            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), true, IndexWriter.MaxFieldLength.LIMITED, null);

            Add("This is a test", "object", writer);
            Add("a note", "note", writer);
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore, true, null);

            // This query will be equivalent to +type:note +body:"a t*"
            BooleanQuery q = new BooleanQuery();

            q.Add(new TermQuery(new Term("type", "note")), Occur.MUST);

            MultiPhraseQuery trouble = new MultiPhraseQuery();

            trouble.Add(new Term("body", "a"));
            trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") });
            q.Add(trouble, Occur.MUST);

            // exception will be thrown here without fix for #35626:
            ScoreDoc[] hits = searcher.Search(q, null, 1000, null).ScoreDocs;
            Assert.AreEqual(0, hits.Length, "Wrong number of hits");
            searcher.Close();
        }
Esempio n. 43
0
 public override void  SetUp()
 {
     base.SetUp();
     IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
     //writer.setUseCompoundFile(true);
     //writer.infoStream = System.out;
     for (int i = 0; i < 1000; i++)
     {
         Document doc = new Document();
         Field.TermVector termVector;
         int mod3 = i % 3;
         int mod2 = i % 2;
         if (mod2 == 0 && mod3 == 0)
         {
             termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
         }
         else if (mod2 == 0)
         {
             termVector = Field.TermVector.WITH_POSITIONS;
         }
         else if (mod3 == 0)
         {
             termVector = Field.TermVector.WITH_OFFSETS;
         }
         else
         {
             termVector = Field.TermVector.YES;
         }
         doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, termVector));
         writer.AddDocument(doc);
     }
     writer.Close();
     searcher = new IndexSearcher(directory, true);
 }
Esempio n. 44
0
        public void CreateWriter()
        {
            IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);

            // TODO: Enter code to create the Lucene Writer
            writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
        }
		public virtual void  TestTermEnum()
		{
			IndexWriter writer = null;
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			
			// add 100 documents with term : aaa
			// add 100 documents with terms: aaa bbb
			// Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100
			for (int i = 0; i < 100; i++)
			{
				AddDoc(writer, "aaa");
				AddDoc(writer, "aaa bbb");
			}
			
			writer.Close();
			
			// verify document frequency of terms in an unoptimized index
			VerifyDocFreq();
			
			// merge segments by optimizing the index
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
			writer.Optimize();
			writer.Close();
			
			// verify document frequency of terms in an optimized index
			VerifyDocFreq();
		}
 public LuceneInteractive()
 {
     luceneIndexDirectory = null;
     writer   = null;
     analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
     parser   = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);
 }
Esempio n. 47
0
		public virtual void  TestSimpleSkip()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Term term = new Term("test", "a");
			for (int i = 0; i < 5000; i++)
			{
				Document d1 = new Document();
				d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(d1);
			}
			writer.Flush();
			writer.Optimize();
			writer.Close();
			
			IndexReader reader = SegmentReader.GetOnlySegmentReader(dir);
			SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions();
			tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit);
			
			for (int i = 0; i < 2; i++)
			{
				counter = 0;
				tp.Seek(term);
				
				CheckSkipTo(tp, 14, 185); // no skips
				CheckSkipTo(tp, 17, 190); // one skip on level 0
				CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
				
				// this test would fail if we had only one skip level,
				// because than more bytes would be read from the freqStream
				CheckSkipTo(tp, 4800, 250); // one skip on level 2
			}
		}
Esempio n. 48
0
        public void IndexFile(string filePath)
        {
            PropertyDescriptors descriptors = new PropertyDescriptors();
            descriptors.LoadData(System.Windows.Forms.Application.StartupPath + "\\PropertyDescriptors.xml");
            Analyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            bool create = !(System.IO.Directory.Exists(_idxDir) && IndexReader.IndexExists(_idxDir));
            IndexWriter iw = new IndexWriter(_idxDir, a, create);
            iw.SetUseCompoundFile(true);

            AdDataStream adStream = new AdDataStream(filePath);
            adStream.LoadData();
            foreach (Advert ad in adStream.FetchAd())
            {
                Document doc = new Document();
                foreach (string s in ad.GetDictionary().Keys)
                {
                    string temp = descriptors.GetIndexableFormat(descriptors[s], ad[s]);
                    doc.Add(Field.Text(s, temp));

                }
                iw.AddDocument(doc);
                if (_updateCallback != null)
                {
                    _updateCallback("Added Document: " + ad["Title"]);

                }
            }
            iw.Optimize();
            iw.Close();
        }
Esempio n. 49
0
        public void CreateSearchIndex()
        {
            directory = new RAMDirectory();
            analyzer = new StandardAnalyzer(Version.LUCENE_30);
            var ixw = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            LookupTable = new Dictionary<string, BaseContent>();
            foreach (BaseContent p in Service.PoIs.ToList())
            {
                var document = new Document();
                document.Add(new Field("id", p.Id.ToString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
                string all = p.Name + " ";
                foreach (MetaInfo mi in p.EffectiveMetaInfo)
                {
                    string value;
                    if (mi.Type != MetaTypes.text || !p.Labels.TryGetValue(mi.Label, out value)) continue;
                    document.Add(new Field(mi.Label, value, Field.Store.YES, Field.Index.ANALYZED));
                    all += value + " ";
                }
                document.Add(new Field("All", all, Field.Store.YES, Field.Index.ANALYZED));

                LookupTable[p.Id.ToString()] = p;
                ixw.AddDocument(document);
            }
            ixw.Commit();
        }
		public virtual void  TestMultiValueSource()
		{
			Directory dir = new MockRAMDirectory();
			IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
			Document doc = new Document();
			Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
			doc.Add(f);
			
			for (int i = 0; i < 17; i++)
			{
				f.SetValue("" + i);
				w.AddDocument(doc);
				w.Commit();
			}
			
			IndexReader r = w.GetReader();
			w.Close();
			
			Assert.IsTrue(r.GetSequentialSubReaders().Length > 1);
			
			ValueSource s1 = new IntFieldSource("field");
			DocValues v1 = s1.GetValues(r);
			DocValues v2 = new MultiValueSource(s1).GetValues(r);
			
			for (int i = 0; i < r.MaxDoc(); i++)
			{
				Assert.AreEqual(v1.IntVal(i), i);
				Assert.AreEqual(v2.IntVal(i), i);
			}
			
			Lucene.Net.Search.FieldCache_Fields.DEFAULT.PurgeAllCaches();
			
			r.Close();
			dir.Close();
		}
        public override void  SetUp()
        {
            base.SetUp();


            System.String[] data = new System.String[] { "A 1 2 3 4 5 6", "Z       4 5 6", null, "B   2   4 5 6", "Y     3   5 6", null, "C     3     6", "X       4 5 6" };

            index = new RAMDirectory();
            IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int i = 0; i < data.Length; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i)));
                doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED));                     //Field.Keyword("all","all"));
                if (null != data[i])
                {
                    doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED));                     //Field.Text("data",data[i]));
                }
                writer.AddDocument(doc, null);
            }

            writer.Optimize(null);
            writer.Close();

            r = IndexReader.Open(index, true, null);
            s = new IndexSearcher(r);

            //System.out.println("Set up " + getName());
        }
        /// <summary>
        /// index 1 document and commit.
        /// prepare for crashing.
        /// index 1 more document, and upon commit, creation of segments_2 will crash.
        /// </summary>
        private void IndexAndCrashOnCreateOutputSegments2()
        {
            Directory realDirectory = FSDirectory.Open(Path);
            CrashAfterCreateOutput crashAfterCreateOutput = new CrashAfterCreateOutput(realDirectory);

            // NOTE: cannot use RandomIndexWriter because it
            // sometimes commits:
            IndexWriter indexWriter = new IndexWriter(crashAfterCreateOutput, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            indexWriter.AddDocument(Document);
            // writes segments_1:
            indexWriter.Commit();

            crashAfterCreateOutput.GetCrashAfterCreateOutput = "segments_2";
            indexWriter.AddDocument(Document);
            try
            {
                // tries to write segments_2 but hits fake exc:
                indexWriter.Commit();
                Assert.Fail("should have hit CrashingException");
            }
            catch (CrashingException e)
            {
                // expected
            }
            // writes segments_3
            indexWriter.Dispose();
            Assert.IsFalse(SlowFileExists(realDirectory, "segments_2"));
            crashAfterCreateOutput.Dispose();
        }
Esempio n. 53
0
        public void TestReadersWriters()
        {
            Directory dir;
            
            using(dir = new RAMDirectory())
            {
                Document doc;
                IndexWriter writer;
                IndexReader reader;

                using (writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    Field field = new Field("name", "value", Field.Store.YES,Field.Index.ANALYZED);
                    doc = new Document();
                    doc.Add(field);
                    writer.AddDocument(doc);
                    writer.Commit();

                    using (reader = writer.GetReader())
                    {
                        IndexReader r1 = reader.Reopen();
                    }

                    Assert.Throws<AlreadyClosedException>(() => reader.Reopen(), "IndexReader shouldn't be open here");
                }
                
                Assert.Throws<AlreadyClosedException>(() => writer.AddDocument(doc), "IndexWriter shouldn't be open here");

                Assert.IsTrue(dir.isOpen_ForNUnit, "RAMDirectory");
            }
            Assert.IsFalse(dir.isOpen_ForNUnit, "RAMDirectory");
        }
Esempio n. 54
0
        public void MrsJones()
        {
            using (var dir = new RAMDirectory())
            using (var analyzer = new LowerCaseKeywordAnalyzer())
            {
                using (var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    var document = new Lucene.Net.Documents.Document();
                    document.Add(new Field("Name", "MRS. SHABA", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
                    writer.AddDocument(document);
                }

                var searcher = new IndexSearcher(dir, true);

                var termEnum = searcher.IndexReader.Terms();
                while (termEnum.Next())
                {
                    var buffer = termEnum.Term.Text;
                    Console.WriteLine(buffer);
                }

                var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer);
                var query = queryParser.Parse("Name:\"MRS. S*\"");
                Console.WriteLine(query);
                var result = searcher.Search(query, 10);

                Assert.NotEqual(0, result.TotalHits);
            }
        }
        public virtual void TestAddBinaryTwice()
        {
            Analyzer analyzer = new MockAnalyzer(Random());

            Directory directory = NewDirectory();
            // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy());
            IndexWriter iwriter = new IndexWriter(directory, iwc);
            Document doc = new Document();
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!")));
            doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!")));
            try
            {
                iwriter.AddDocument(doc);
                Assert.Fail("didn't hit expected exception");
            }
            catch (System.ArgumentException expected)
            {
                // expected
            }

            iwriter.Dispose();
            directory.Dispose();
        }
        public virtual void TestSimpleSkip()
        {
            Directory dir = new CountingRAMDirectory(this, new RAMDirectory());
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())).SetMergePolicy(NewLogMergePolicy()));
            Term term = new Term("test", "a");
            for (int i = 0; i < 5000; i++)
            {
                Document d1 = new Document();
                d1.Add(NewTextField(term.Field(), term.Text(), Field.Store.NO));
                writer.AddDocument(d1);
            }
            writer.Commit();
            writer.ForceMerge(1);
            writer.Dispose();

            AtomicReader reader = GetOnlySegmentReader(DirectoryReader.Open(dir));

            for (int i = 0; i < 2; i++)
            {
                Counter = 0;
                DocsAndPositionsEnum tp = reader.TermPositionsEnum(term);
                CheckSkipTo(tp, 14, 185); // no skips
                CheckSkipTo(tp, 17, 190); // one skip on level 0
                CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0

                // this test would fail if we had only one skip level,
                // because than more bytes would be read from the freqStream
                CheckSkipTo(tp, 4800, 250); // one skip on level 2
            }
        }
Esempio n. 57
0
        public void testMissingTerms()
        {
            String fieldName = "field1";
            Directory rd = new RAMDirectory();
            var w = new IndexWriter(rd, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
            for (int i = 0; i < 100; i++)
            {
                var doc = new Document();
                int term = i*10; //terms are units of 10;
                doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.ANALYZED));
                w.AddDocument(doc);
            }
            IndexReader reader = w.GetReader();
            w.Close();

            TermsFilter tf = new TermsFilter();
            tf.AddTerm(new Term(fieldName, "19"));
            FixedBitSet bits = (FixedBitSet) tf.GetDocIdSet(reader);
            Assert.AreEqual(0, bits.Cardinality(), "Must match nothing");

            tf.AddTerm(new Term(fieldName, "20"));
            bits = (FixedBitSet) tf.GetDocIdSet(reader);
            Assert.AreEqual(1, bits.Cardinality(), "Must match 1");

            tf.AddTerm(new Term(fieldName, "10"));
            bits = (FixedBitSet) tf.GetDocIdSet(reader);
            Assert.AreEqual(2, bits.Cardinality(), "Must match 2");

            tf.AddTerm(new Term(fieldName, "00"));
            bits = (FixedBitSet) tf.GetDocIdSet(reader);
            Assert.AreEqual(2, bits.Cardinality(), "Must match 2");

            reader.Close();
            rd.Close();
        }
Esempio n. 58
0
        public void CreateIndex(Analyzer analayer) 
        {
            FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder));
            IndexWriter indexWriter = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            string[] files = System.IO.Directory.GetFiles(_textFilesFolder, Config.FileSearchPattern, SearchOption.AllDirectories);
            foreach (string file in files)
            {
                string name = new FileInfo(file).Name;
                string content = File.ReadAllText(file);

                Document doc = new Document();
                doc.Add(new Field(Config.Field_Path, file, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field(Config.Field_Name, name, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(Config.Field_Content, content, Field.Store.NO, Field.Index.ANALYZED));

                indexWriter.AddDocument(doc);

                Console.WriteLine("{0} - {1}", file, name);
            }

            indexWriter.Optimize();
            indexWriter.Dispose();

            Console.WriteLine("File count: {0}", files.Length);
        }
Esempio n. 59
0
 /// <summary>
 /// Creates the index at a given path
 /// </summary>
 /// <param name="indexPath">The pathname to create the index</param>
 public void CreateIndex(string indexPath)
 {
     luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath + "/IndexStoredPosition");
     IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH);
     writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl);
     writer.SetSimilarity(mySimilarity);
 }
Esempio n. 60
0
 public void Searcher(string path)
 {
     luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(path);
     writer   = null;
     analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
     parser   = new QueryParser(VERSION, TEXT, analyzer);
 }