public void ClearIndex()
        {
            if (System.IO.Directory.GetFiles(this.index.Directory.FullName).Any())
            {
                try
                {
                    var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                    using (var writer = new IndexWriter(this.index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                    {
                        // remove older index entries
                        writer.DeleteAll();

                        // close handles
                        analyzer.Close();
                        writer.Dispose();
                    }

                    ForceUnlockIndex();
                }
                catch (Exception)
                {
                    throw;
                }
            }
        }
Example #2
0
		public virtual void  TestDemo_Renamed_Method()
		{
			
			Analyzer analyzer = new StandardAnalyzer();
			
			// Store the index in memory:
			Directory directory = new RAMDirectory();
			// To store an index on disk, use this instead (note that the 
			// parameter true will overwrite the index in that directory
			// if one exists):
			//Directory directory = FSDirectory.getDirectory("/tmp/testindex", true);
			IndexWriter iwriter = new IndexWriter(directory, analyzer, true);
			iwriter.SetMaxFieldLength(25000);
			Document doc = new Document();
			System.String text = "This is the text to be indexed.";
			doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED));
			iwriter.AddDocument(doc);
			iwriter.Close();
			
			// Now search the index:
			IndexSearcher isearcher = new IndexSearcher(directory);
			// Parse a simple query that searches for "text":
			Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer);
			Query query = parser.Parse("text");
			Hits hits = isearcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			// Iterate through the results:
			for (int i = 0; i < hits.Length(); i++)
			{
				Document hitDoc = hits.Doc(i);
				Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname"));
			}
			isearcher.Close();
			directory.Close();
		}
        public void ClearIndex()
        {
            if (System.IO.Directory.GetFiles(this.index.Directory.FullName).Any())
            {
                try
                {
                    var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                    using (var writer = new IndexWriter(this.index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
                    {
                        // remove older index entries
                        writer.DeleteAll();

                        // close handles
                        analyzer.Close();
                        writer.Dispose();
                    }

                    ForceUnlockIndex();
                }
                catch (Exception)
                {
                    throw;
                }
            }
        }
Example #4
0
        private static IndexWriter GetWriter()
        {
            var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            var writer   = new Lucene.Net.Index.IndexWriter(IndexDir, analyzer);

            return(writer);
        }
Example #5
0
        public SearchResult[] Search(string searchString)
        {
            Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_29);

            QueryParser parser = new QueryParser(Version.LUCENE_29, "Content", analyzer);

            var query = parser.Parse(searchString);

            Searcher searcher = new IndexSearcher(Lucene.Net.Index.IndexReader.Open(directory, true));

            TopScoreDocCollector collector = TopScoreDocCollector.Create(100, true);

            searcher.Search(query, collector);
            var hits = collector.TopDocs().ScoreDocs;

            List<SearchResult> results = new List<SearchResult>();

            for (int i = 0; i < hits.Length; i++)
            {
                int docId = hits[i].Doc;
                float score = hits[i].Score;

                Lucene.Net.Documents.Document doc = searcher.Doc(docId);

                results.Add(new SearchResult
                {
                    BookId = Guid.Parse(doc.Get("BookId")),
                    Score = score
                });
            }

            return results.ToArray();
        }
Example #6
0
        //public void Delete(string id)
        //{
        //    Analyzer _standardanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
        //    IndexWriter _writer = new IndexWriter(_directory, _standardanalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

        //    QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "id", _standardanalyzer);
        //    Query query = parser.Parse(id);

        //    _writer.DeleteDocuments(query);
        //}

        public void Delete(string id)
        {
            Analyzer    _standardanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            IndexWriter _writer           = new IndexWriter(_directory, _standardanalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            _writer.DeleteDocuments(new Term("id", id));
        }
Example #7
0
        public void CanQueryLuceneIndexCreatedOnDisk()
        {
            CanCreateLuceneIndexOnDisk();

            System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(System.IO.Path.GetTempPath());
            using (Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(di))
            {
                Lucene.Net.Index.IndexReader ir = Lucene.Net.Index.IndexReader.Open(directory, true);
                Lucene.Net.Search.Searcher searcher = new Lucene.Net.Search.IndexSearcher(ir);
                using (Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
                {
                    Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Version.LUCENE_30, "content", analyzer);
                    Lucene.Net.Search.Query query = parser.Parse("lorem");
                    Lucene.Net.Search.TopScoreDocCollector collector = Lucene.Net.Search.TopScoreDocCollector.Create(100, true);
                    searcher.Search(query, collector);
                    Lucene.Net.Search.ScoreDoc[] docs = collector.TopDocs().ScoreDocs;

                    foreach (Lucene.Net.Search.ScoreDoc scoreDoc in docs)
                    {
                        //Get the document that represents the search result.
                        Document document = searcher.Doc(scoreDoc.Doc);

                        var id = document.Get("Id");
                        var content = document.Get("content");
                    }
                }
            }
        }
Example #8
0
        private static Query GetQuery(string fieldName, string searchText, Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer)
        {
            var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, fieldName, analyzer);
            var query  = parser.Parse(searchText);

            return(query);
        }
Example #9
0
        public void IndexFile(string filePath)
        {
            PropertyDescriptors descriptors = new PropertyDescriptors();
            descriptors.LoadData(System.Windows.Forms.Application.StartupPath + "\\PropertyDescriptors.xml");
            Analyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            bool create = !(System.IO.Directory.Exists(_idxDir) && IndexReader.IndexExists(_idxDir));
            IndexWriter iw = new IndexWriter(_idxDir, a, create);
            iw.SetUseCompoundFile(true);

            AdDataStream adStream = new AdDataStream(filePath);
            adStream.LoadData();
            foreach (Advert ad in adStream.FetchAd())
            {
                Document doc = new Document();
                foreach (string s in ad.GetDictionary().Keys)
                {
                    string temp = descriptors.GetIndexableFormat(descriptors[s], ad[s]);
                    doc.Add(Field.Text(s, temp));

                }
                iw.AddDocument(doc);
                if (_updateCallback != null)
                {
                    _updateCallback("Added Document: " + ad["Title"]);

                }
            }
            iw.Optimize();
            iw.Close();
        }
Example #10
0
		public virtual void  TestDemo_Renamed()
		{
			
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
			
			// Store the index in memory:
			Directory directory = new RAMDirectory();
			// To store an index on disk, use this instead:
			//Directory directory = FSDirectory.open("/tmp/testindex");
			IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
			Document doc = new Document();
			System.String text = "This is the text to be indexed.";
			doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED));
			iwriter.AddDocument(doc);
			iwriter.Close();
			
			// Now search the index:
			IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
			// Parse a simple query that searches for "text":
			QueryParser parser = new QueryParser("fieldname", analyzer);
			Query query = parser.Parse("text");
			ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs;
			Assert.AreEqual(1, hits.Length);
			// Iterate through the results:
			for (int i = 0; i < hits.Length; i++)
			{
				Document hitDoc = isearcher.Doc(hits[i].doc);
				Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed.");
			}
			isearcher.Close();
			directory.Close();
		}
        public virtual void  TestMmapIndex()
        {
            FSDirectory storeDirectory;

            storeDirectory = new MMapDirectory(new System.IO.FileInfo(storePathname), null);

            // plan to add a set of useful stopwords, consider changing some of the
            // interior filters.
            StandardAnalyzer analyzer = new StandardAnalyzer(new System.Collections.Hashtable());
            // TODO: something about lock timeouts and leftover locks.
            IndexWriter   writer   = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            IndexSearcher searcher = new IndexSearcher(storePathname);

            for (int dx = 0; dx < 1000; dx++)
            {
                System.String f   = RandomField();
                Document      doc = new Document();
                doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }

            searcher.Close();
            writer.Close();
            RmDir(new System.IO.FileInfo(storePathname));
        }
Example #12
0
        public virtual void  TestMmapIndex()
        {
            Assert.Ignore("Need to port tests, but we don't really support MMapDirectories anyway");

            FSDirectory storeDirectory;
            storeDirectory = new MMapDirectory(new System.IO.DirectoryInfo(storePathname), null);
            
            // plan to add a set of useful stopwords, consider changing some of the
            // interior filters.
            StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet<string>());
            // TODO: something about lock timeouts and leftover locks.
            IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            IndexSearcher searcher = new IndexSearcher(storeDirectory, true);
            
            for (int dx = 0; dx < 1000; dx++)
            {
                System.String f = RandomField();
                Document doc = new Document();
                doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }
            
            searcher.Close();
            writer.Close();
            RmDir(new System.IO.FileInfo(storePathname));
        }
		public virtual void  TestMaxTermLength2()
		{
			StandardAnalyzer sa = new StandardAnalyzer();
			AssertAnalyzesTo(sa, "ab cd toolong xy z", new System.String[]{"ab", "cd", "toolong", "xy", "z"});
			sa.SetMaxTokenLength(5);
			
			AssertAnalyzesTo(sa, "ab cd toolong xy z", new System.String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
		}
Example #14
0
        static void Main(string[] args)
        {
            var sw = Stopwatch.StartNew();
            // Delete index from previous run
            var directoryName = "index";

            if (System.IO.Directory.Exists(directoryName))
            {
                System.IO.Directory.Delete(directoryName, true);
            }

            using (Directory directory = new MMapDirectory("index"))
                using (var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(LuceneVersion.LUCENE_48))
                {
                    var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
                    using (var ixw = new IndexWriter(directory, config))
                    {
                        IList <Document> documents = new List <Document>(2000);
                        for (int i = 0; i < 1000; i++)
                        {
                            var document = new Document
                            {
                                new StringField("id", Guid.NewGuid().ToString(), Field.Store.YES),
                                new StringField("notTokenized", "Will not be tokenized", Field.Store.YES),
                                new TextField("content", "Hello world", Field.Store.YES),
                                new Int32Field("intValue", 32, Field.Store.YES),
                                new Int32Field("intNotStoredValue", 32, Field.Store.NO),
                                new NumericDocValuesField("docValue", 64)
                            };
                            documents.Add(document);
                        }


                        for (int i = 0; i < 1000; i++)
                        {
                            var document2 = new Document
                            {
                                new StringField("id", Guid.NewGuid().ToString(), Field.Store.YES),
                                new StringField("notTokenized", "Will not be tokenized", Field.Store.YES),
                                new TextField("content", "Hello world 2", Field.Store.YES),
                                new Int32Field("intValue", 33, Field.Store.YES),
                                new Int32Field("intNotStoredValue", 32, Field.Store.NO),
                                new NumericDocValuesField("docValue", 65)
                            };
                            documents.Add(document2);
                        }

                        ixw.AddDocuments(documents);
                        ixw.Commit();

                        ixw.DeleteDocuments(NumericRangeQuery.NewInt32Range("intValue", 33, 33, true, true));
                        ixw.Commit();
                    }
                }

            Console.WriteLine(sw.ElapsedMilliseconds);
            Console.ReadKey();
        }
        private void btnFolder_Click(object sender, EventArgs e)
        {
            FolderBrowserDialog dia = new FolderBrowserDialog();
            DialogResult res = dia.ShowDialog();
            if (res != System.Windows.Forms.DialogResult.OK)
            {
                return;
            }

            FSDirectory dir = FSDirectory.GetDirectory(Environment.CurrentDirectory + "\\LuceneIndex");
            //Lucene.Net.Store.RAMDirectory dir = new RAMDirectory();
            Lucene.Net.Analysis.Standard.StandardAnalyzer an = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            IndexWriter wr = new IndexWriter(dir, an,true);
            IStemmer stemmer = new EnglishStemmer();
            DirectoryInfo diMain = new DirectoryInfo(dia.SelectedPath);
            foreach(FileInfo fi in diMain.GetFiles()){
                Document doc = new Document();
                doc.Add(new Field("title", fi.Name,Field.Store.YES, Field.Index.NO));
                //doc.Add(new Field("text", File.ReadAllText(fi.FullName),Field.Store.YES, Field.Index.TOKENIZED,Field.TermVector.YES));
                doc.Add(new Field("text", PerformStemming(stemmer,NLPToolkit.Tokenizer.TokenizeNow(File.ReadAllText(fi.FullName)).ToArray()), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
                wr.AddDocument(doc);
            }
            wr.Optimize();
            wr.Flush();
            wr.Close();
            dir.Close();

            IndexReader reader = IndexReader.Open(dir);
            for (int i = 0; i < reader.MaxDoc(); i++)
            {
                if (reader.IsDeleted(i))
                    continue;

                Document doc = reader.Document(i);
                String docId = doc.Get("docId");
                foreach (TermFreqVector vector in reader.GetTermFreqVectors(i))
                {
                    foreach(string term in vector.GetTerms()){
                        Console.WriteLine(term);
                    }
                }
                // do something with docId here...
            }
            //IndexSearcher search = new IndexSearcher(wr.GetReader());

            //MoreLikeThis mlt = new MoreLikeThis(wr.GetReader());
            //FileInfo fitarget = new FileInfo(@"C:\Users\peacemaker\Desktop\TestNoBitcoin\test.txt");
            //Query query = mlt.Like(fitarget);

            //var hits = search.Search(query, int.MaxValue);
            //foreach (ScoreDoc doc in hits.ScoreDocs)
            //{
            //    textBox1.Text += doc.Score + Environment.NewLine;
            //}
        }
Example #16
0
        public static List <Models.SearchResult> Query(string text, int max = 8, string userId = null)
        {
            if (!string.IsNullOrEmpty(text) && !text.EndsWith(":"))
            {
                userId = string.IsNullOrEmpty(userId) ? Account.AuditId : userId;
                var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);         //todo: what version?
                var parser   = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", analyzer); //text is just the default field to search
                var query    = parser.Parse(text);
                //var term = new Term("text", text.ToLower());
                //var query = new Lucene.Net.Search.PrefixQuery(term); //parser.Parse(text);
                //var query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", analyzer).Parse(text.ToLower());

                using (var dir = FSDirectory.Open(new DirectoryInfo(IndexDir)))
                {
                    using (var searcher = new IndexSearcher(dir, true))
                    {
                        var collector = TopScoreDocCollector.create(max * 2, true); //todo: mini-hack to accomidate the post-filtering of search results - attempt to get twice as many results as we need.  hopefully we won't filter out that many...
                        searcher.Search(query, collector);
                        var hits = collector.TopDocs().ScoreDocs;

                        //var hits = searcher.Search(query);
                        var ret = new List <Models.SearchResult>();

                        for (var i = 0; i < hits.Length; i++)
                        {
                            var docId    = hits[i].doc;
                            var doc      = new Models.SearchDocument(searcher.Doc(docId));
                            var provider = GetDocumentProvider(doc.Type);
                            if (provider != null)
                            {
                                if (provider.IsAuthorized(doc, userId))
                                {
                                    ret.Add(provider.FormatResult(doc));
                                }
                            }
                            else
                            {
                                throw new Exception(string.Format("Formatter for type {0} not found", doc.Type));
                            }

                            if (ret.Count >= max)   //todo: mini-hack to accomidate the post-filtering of search results
                            {
                                break;
                            }
                        }

                        return(ret);
                    }
                }
            }
            return(new List <SearchResult>());
        }
        public void TestMemLeakage()
        {
            CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler = true;

            int LoopCount = 100;
            Analyzer[] analyzers = new Analyzer[LoopCount];
            RAMDirectory[] dirs = new RAMDirectory[LoopCount];
            IndexWriter[] indexWriters = new IndexWriter[LoopCount];

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) =>
                                                                  {
                                                                      analyzers[i] = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_CURRENT);
                                                                      dirs[i] = new RAMDirectory();
                                                                      indexWriters[i] = new IndexWriter(dirs[i], analyzers[i], true, IndexWriter.MaxFieldLength.UNLIMITED);
                                                                  });

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) =>
                                                                  {
                                                                      Document document = new Document();
                                                                      document.Add(new Field("field", "some test", Field.Store.NO, Field.Index.ANALYZED));
                                                                      indexWriters[i].AddDocument(document);
                                                                  });

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) =>
                                                                  {
                                                                      analyzers[i].Dispose();
                                                                      indexWriters[i].Dispose();
                                                                  });

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) =>
                                                                  {
                                                                      IndexSearcher searcher = new IndexSearcher(dirs[i]);
                                                                      TopDocs d = searcher.Search(new TermQuery(new Term("field", "test")), 10);
                                                                      searcher.Close();
                                                                  });

            System.Threading.Tasks.Parallel.For(0, LoopCount, (i) => dirs[i].Dispose());

            GC.Collect(GC.MaxGeneration);
            GC.WaitForPendingFinalizers();

            int aliveObjects = 0;
            foreach (WeakReference w in CloseableThreadLocalProfiler.Instances)
            {
                object o = w.Target;
                if (o != null) aliveObjects++;
            }

            CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler = false;

            Assert.AreEqual(0, aliveObjects);
        }
Example #18
0
        /// <summary>
        /// Save to Index
        /// </summary>
        /// <param name="item"></param>
        /// <param name="indexPath"></param>
        public static bool SaveToIndex(SiteSearchItem item, string indexPath)
        {
            bool isSuceess = false;

            try
            {
                //Delete First
                DeleteIndex(item, indexPath);

                if (item.IsDeleted)
                {
                    return(true);
                }


                //Add to index
                var indexDirectory = FSDirectory.Open(new DirectoryInfo(indexPath));
                var analyzer       = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                var writer         = new Lucene.Net.Index.IndexWriter(indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

                var doc = new Document();
                doc.Add(new Field("ID", item.ID, Field.Store.YES, Field.Index.NOT_ANALYZED));

                //Set Boost in title
                var titleField = new Field("Title", item.Title, Field.Store.YES, Field.Index.ANALYZED);
                titleField.Boost = 1.3f;
                doc.Add(titleField);

                //Set Boost in Description
                var descriptionField = new Field("Description", item.Description, Field.Store.YES, Field.Index.ANALYZED);
                descriptionField.Boost = 1.1f;
                doc.Add(descriptionField);

                //Default boost 1f in Document Description
                doc.Add(new Field("DocumentDescription", item.DocumentDescription, Field.Store.YES, Field.Index.ANALYZED));

                doc.Add(new Field("URL", item.URL, Field.Store.YES, Field.Index.NOT_ANALYZED));

                writer.AddDocument(doc);

                analyzer.Close();
                writer.Dispose();

                isSuceess = true;
            }
            catch (Exception ex)
            {
                ErrorLog.WriteLog("SiteSearchService", "SaveToIndex", ex, string.Empty);
            }

            return(isSuceess);
        }
Example #19
0
        public void Code()
        {
            Analyzer _keywordanalyzer    = new KeywordAnalyzer();
            Analyzer _simpleanalyzer     = new Lucene.Net.Analysis.SimpleAnalyzer();
            Analyzer _stopanalyzer       = new Lucene.Net.Analysis.StopAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            Analyzer _whitespaceanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            Analyzer _standardanalyzer   = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);


            var _perfieldanalyzer = new Lucene.Net.Analysis.PerFieldAnalyzerWrapper(_standardanalyzer);

            _perfieldanalyzer.AddAnalyzer("firstname", _keywordanalyzer);
            _perfieldanalyzer.AddAnalyzer("lastname", _keywordanalyzer);


            IndexWriter _writer = new IndexWriter(_directory, _perfieldanalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

            IndexReader _reader = _writer.GetReader();

            IndexSearcher _searcher = new IndexSearcher(_reader);


            //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", _standardanalyzer);

            string[] fields = new[] { "text", "title", "author" };
            var      boosts = new Dictionary <string, float>();

            boosts.Add("text", 2.0f);
            boosts.Add("title", 1.5f);
            QueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, fields, _standardanalyzer, boosts);
            Query       query  = parser.Parse("lucene is great");


            TopDocs hits = _searcher.Search(query, 1000);

            IEnumerable <Document> docs = hits.ScoreDocs.Select(hit => _searcher.Doc(hit.Doc));

            var books = docs.Select(doc => new Book()
            {
                Text   = doc.Get("text"),
                Title  = doc.Get("title"),
                Author = doc.Get("author"),
                Length = Int32.Parse(doc.Get("length"))
            });


            _writer.Optimize();
            _writer.Commit();
            _writer.DeleteAll();
        }
Example #20
0
        private void StartLuceneIndexCreateProcess()
        {
            string luceneIndexStoragePath = @ConfigurationManager.AppSettings["LuceneIndexStoragePath"];
            bool   folderExists           = System.IO.Directory.Exists(luceneIndexStoragePath);

            if (!folderExists)
            {
                System.IO.Directory.CreateDirectory(luceneIndexStoragePath);
            }

            analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(luceneIndexStoragePath));
            writer = new Lucene.Net.Index.IndexWriter(directory, analyzer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED);
            try
            {
                // We will populate below list to create Lucene index.
                List <string> actorsList = new List <string>();
                actorsList.Add("Johnny Depp");
                actorsList.Add("Robert Downey Jr.");
                actorsList.Add("Johnny Depp");
                actorsList.Add("Tom Cruise");
                actorsList.Add("Brad Pitt");
                actorsList.Add("Tom Hanks");
                actorsList.Add("Denzel Washington");
                actorsList.Add("Russell Crowe");
                actorsList.Add("Kate Winslet");
                actorsList.Add("Christian Bale");
                actorsList.Add("Hugh Jackman");
                actorsList.Add("Will Smith");
                actorsList.Add("Sean Connery");

                foreach (var item in actorsList)
                {
                    Console.WriteLine(item);
                    writer.AddDocument(CreateDocument(item.ToString()));
                }
            }
            catch
            {
                Lucene.Net.Index.IndexWriter.Unlock(directory);
                throw;
            }
            finally
            {
                writer.Optimize();
                analyzer.Close();
                writer.Dispose();
                analyzer.Dispose();
            }
        }
Example #21
0
        public void CanCreateLuceneIndexOnDisk()
        {
            System.IO.DirectoryInfo di = new System.IO.DirectoryInfo(System.IO.Path.Combine(System.IO.Path.GetTempPath(), "lucene_index"));
            using (Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(di))
            using (Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
            {
                string test2 = "Lorem Ipsum è un testo segnaposto .....";
                using (Lucene.Net.Index.IndexWriter ixw = new Lucene.Net.Index.IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(4096)))
                {
                    Document document = new Document();
                    document.Add(new Field("Id","<a title = \"test\" href = \"http://www.codewrecks.com/blog/index.php/2007/09/03/test/\"> test </a >.", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
                    document.Add(new Field("content", "test", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    ixw.AddDocument(document);

                    document = new Document();
                    document.Add(new Field("Id", test2.GetHashCode().ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
                    document.Add(new Field("content", test2, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    ixw.AddDocument(document);
                    ixw.Commit();
                }
            }
        }
		public virtual void  TestMmapIndex()
		{
			FSDirectory storeDirectory;
			storeDirectory = FSDirectory.GetDirectory(storePathname);
			
			// plan to add a set of useful stopwords, consider changing some of the
			// interior filters.
			StandardAnalyzer analyzer = new StandardAnalyzer(new System.Collections.Hashtable());
			// TODO: something about lock timeouts and leftover locks.
			IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true);
			IndexSearcher searcher = new IndexSearcher(storePathname);
			
			for (int dx = 0; dx < 1000; dx++)
			{
				System.String f = RandomField();
				Document doc = new Document();
				doc.Add(new Field("data", f, Field.Store.YES, Field.Index.TOKENIZED));
				writer.AddDocument(doc);
			}
			
			searcher.Close();
			writer.Close();
			RmDir(new System.IO.FileInfo(storePathname));
		}
		public virtual void  TestUnRewrittenQuery()
		{
			//test to show how rewritten query can still be used
			searcher = new IndexSearcher(ramDir);
			Analyzer analyzer = new StandardAnalyzer();
			
			QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
			Query query = parser.Parse("JF? or Kenned*");
			System.Console.Out.WriteLine("Searching with primitive query");
			//forget to set this and...
			//query=query.rewrite(reader);
			Hits hits = searcher.Search(query);
			
			//create an instance of the highlighter with the tags used to surround highlighted text
			//		QueryHighlightExtractor highlighter = new QueryHighlightExtractor(this, query, new StandardAnalyzer());
			Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
			
			highlighter.SetTextFragmenter(new SimpleFragmenter(40));
			
			int maxNumFragmentsRequired = 3;
			
			for (int i = 0; i < hits.Length(); i++)
			{
				System.String text = hits.Doc(i).Get(FIELD_NAME);
				TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new System.IO.StringReader(text));
				
				System.String highlightedText = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
				System.Console.Out.WriteLine(highlightedText);
			}
			//We expect to have zero highlights if the query is multi-terms and is not rewritten!
			Assert.IsTrue(numHighlights == 0, "Failed to find correct number of highlights " + numHighlights + " found");
		}
Example #24
0
		private long ExecuteSearch(IDbConnection Conn)
		{
            List<string> resultadosDaPesquisa = new List<string>();
            long countResults = 0;
            try
            {
                // TODO: Considerar retirar a dependência com Lucene.Net para fazer a validação dos campos: nem todos são validados....
                NivelDocumentalSearch ndSearch = new NivelDocumentalSearch();
                Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                QueryParser qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, string.Empty, analyzer);
                qp.AllowLeadingWildcard = true;
                StringBuilder errorMessage = new StringBuilder();

                if (Helper.IsValidTxtID(qp, MasterPanelPesquisa.txtID.Text))
                    ndSearch.Id = MasterPanelPesquisa.txtID.Text;
                else
                    errorMessage.AppendLine("Identificador: " + MasterPanelPesquisa.txtID.Text);

                ndSearch.TextoLivre = Helper.AddFieldToSearch(qp, "Texto Livre", MasterPanelPesquisa.txtPesquisaSimples.Text, ref errorMessage);
                
                if (MasterPanelPesquisa.cbModulo.Items.Count == 1)
                    ndSearch.Modulo = 1;
                else
                    ndSearch.Modulo = MasterPanelPesquisa.cbModulo.SelectedIndex;

                ndSearch.CodigoParcial = Helper.AddFieldToSearch(qp, "Código Parcial", MasterPanelPesquisa.txtCodigoParcial.Text, ref errorMessage);
                ndSearch.Designacao = Helper.AddFieldToSearch(qp, "Designação", MasterPanelPesquisa.txtDesignacao.Text, ref errorMessage);
                ndSearch.Autor = Helper.AddFieldToSearch(qp, "Autor", MasterPanelPesquisa.txtAutor.Text, ref errorMessage);
                ndSearch.EntidadeProdutora = Helper.AddFieldToSearch(qp, "Entidade Produtora", MasterPanelPesquisa.txtEntidadeProdutora.Text, ref errorMessage);

                if (MasterPanelPesquisa.lstNiveisDocumentais.SelectedItems.Count != MasterPanelPesquisa.lstNiveisDocumentais.Items.Count)
                {
                    List<string> str = new List<string>();
                    foreach (DataRowView item in MasterPanelPesquisa.lstNiveisDocumentais.SelectedItems)
                        str.Add(item.Row["Designacao"].ToString().ToLower());

                    ndSearch.NiveisDocumentais = str.ToArray();
                    ndSearch.NiveisDocumentaisOP = 0;
                }

                if (MasterPanelPesquisa.cdbDataInicio.Checked)
                    ndSearch.DataProducaoInicio = MasterPanelPesquisa.cdbDataInicio.GetStandardMaskDate.ToString("yyyyMMdd");

                if (MasterPanelPesquisa.cdbDataFim.Checked)
                    ndSearch.DataProducaoFim = MasterPanelPesquisa.cdbDataFim.GetStandardMaskDate.ToString("yyyyMMdd");

                if (MasterPanelPesquisa.cdbInicioDoFim.Checked)
                    ndSearch.DataProducaoInicioDoFim = MasterPanelPesquisa.cdbInicioDoFim.GetStandardMaskDate.ToString("yyyyMMdd");

                if (MasterPanelPesquisa.cdbFimDoFim.Checked)
                    ndSearch.DataProducaoFimDoFim = MasterPanelPesquisa.cdbFimDoFim.GetStandardMaskDate.ToString("yyyyMMdd");

                ndSearch.TipologiaInformacional = Helper.AddFieldToSearch(qp, "Tipologia Informacional", MasterPanelPesquisa.txtTipologiaInformacional.Text, ref errorMessage);
                ndSearch.TermosIndexacao = Helper.AddFieldToSearch(qp, "Indexação", MasterPanelPesquisa.txtIndexacao.Text, ref errorMessage);
                ndSearch.ConteudoInformacional = Helper.AddFieldToSearch(qp, "Conteúdo Informacional", MasterPanelPesquisa.txtConteudoInformacional.Text, ref errorMessage);
                ndSearch.Notas = Helper.AddFieldToSearch(qp, "Notas", MasterPanelPesquisa.txtNotas.Text, ref errorMessage);
                ndSearch.Cota = Helper.AddFieldToSearch(qp, "Cota", Helper.EscapeSpecialCharactersCotaDocumento(MasterPanelPesquisa.txtCota.Text.ToLower()), ref errorMessage);
                ndSearch.Agrupador = Helper.AddFieldToSearch(qp, "Agrupador", MasterPanelPesquisa.txtAgrupador.Text, ref errorMessage);
                ndSearch.SoComODs = string.Empty;
                ndSearch.SoComODsPub = string.Empty;
                ndSearch.SoComODsNaoPub = string.Empty;
                switch (MasterPanelPesquisa.cbODs.SelectedIndex)
                {
                    case 1:
                        ndSearch.SoComODs = Helper.AddFieldToSearch(qp, "objetos", "sim", ref errorMessage);
                        break;
                    case 2:
                        ndSearch.SoComODsPub = Helper.AddFieldToSearch(qp, "objetosPublicados", "sim", ref errorMessage);
                        break;
                    case 3:
                        ndSearch.SoComODsNaoPub = Helper.AddFieldToSearch(qp, "objetosNaoPublicados", "sim", ref errorMessage);
                        break;
                }
                if (MasterPanelPesquisa.chkFormaSuporte.Checked)
                {
                    StringBuilder str = new StringBuilder();
                    foreach (DataRowView item in MasterPanelPesquisa.lstFormaSuporte.SelectedItems)
                    {
                        str.Append(item.Row["Designacao"].ToString());
                        str.Append(" ");
                    }
                    ndSearch.SuporteEAcondicionamento = BreakStrings(str.ToString().ToLower());
                    ndSearch.SuporteEAcondicionamentoOP = MasterPanelPesquisa.cbFormaSuporte.SelectedIndex;
                }

                if (MasterPanelPesquisa.chkMaterialSuporte.Checked)
                {
                    StringBuilder str = new StringBuilder();
                    foreach (DataRowView item in MasterPanelPesquisa.lstMaterialSuporte.SelectedItems)
                    {
                        str.Append(item.Row["Designacao"].ToString());
                        str.Append(" ");
                    }
                    ndSearch.MaterialDeSuporte = BreakStrings(str.ToString().ToLower());
                    ndSearch.MaterialDeSuporteOP = MasterPanelPesquisa.cbMaterialSuporte.SelectedIndex;
                }

                if (MasterPanelPesquisa.chkTecnicaRegisto.Checked)
                {
                    StringBuilder str = new StringBuilder();
                    foreach (DataRowView item in MasterPanelPesquisa.lstTecnicaRegisto.SelectedItems)
                    {
                        str.Append(item.Row["Designacao"].ToString());
                        str.Append(" ");
                    }
                    ndSearch.TecnicaRegisto = BreakStrings(str.ToString().ToLower());
                    ndSearch.TecnicaRegistoOP = MasterPanelPesquisa.cbTecnicaRegisto.SelectedIndex;
                }

                if (MasterPanelPesquisa.chkEstadoConservacao.Checked)
                {
                    StringBuilder str = new StringBuilder();
                    foreach (DataRowView item in MasterPanelPesquisa.lstEstadoConservacao.SelectedItems)
                    {
                        str.Append(item.Row["Designacao"].ToString());
                        str.Append(" ");
                    }
                    ndSearch.EstadoConservacao = BreakStrings(str.ToString().ToLower());
                    ndSearch.EstadoConservacaoOP = 0;
                }

                #region Licencas de obra
                if (MasterPanelPesquisa.get_Nome_LicencaObraRequerentes().Length > 0)
                    ndSearch.Nome_LicencaObraRequerentes = MasterPanelPesquisa.get_Nome_LicencaObraRequerentes();

                if (MasterPanelPesquisa.get_LocalizacaoObra_Actual().Length > 0)
                    ndSearch.LocalizacaoObra_Actual = MasterPanelPesquisa.get_LocalizacaoObra_Actual();
                if (MasterPanelPesquisa.get_NumPolicia_Actual().Length > 0)
                    ndSearch.NumPolicia_Actual = MasterPanelPesquisa.get_NumPolicia_Actual();

                if (MasterPanelPesquisa.get_LocalizacaoObra_Antiga().Length > 0)
                    ndSearch.LocalizacaoObra_Antiga = MasterPanelPesquisa.get_LocalizacaoObra_Antiga();
                if (MasterPanelPesquisa.get_NumPolicia_Antigo().Length > 0)
                    ndSearch.NumPolicia_Antigo = MasterPanelPesquisa.get_NumPolicia_Antigo();

                if (MasterPanelPesquisa.get_TipoObra().Length > 0)
                    ndSearch.LicencaObra_TipoObra = MasterPanelPesquisa.get_TipoObra();

                if (MasterPanelPesquisa.get_TecnicoObra().Length > 0)
                    ndSearch.Termo_LicencaObraTecnicoObra = MasterPanelPesquisa.get_TecnicoObra();

                if (MasterPanelPesquisa.get_CodigosAtestadoHabitabilidade().Length > 0)
                    ndSearch.CodigosAtestadoHabitabilidade = MasterPanelPesquisa.get_CodigosAtestadoHabitabilidade();

                if (MasterPanelPesquisa.get_Datas_LicencaObraDataLicencaConstrucao_Inicio().Length > 0)
                    ndSearch.Datas_LicencaObraDataLicencaConstrucao_Inicio = MasterPanelPesquisa.get_Datas_LicencaObraDataLicencaConstrucao_Inicio();
                if (MasterPanelPesquisa.get_Datas_LicencaObraDataLicencaConstrucao_Fim().Length > 0)
                    ndSearch.Datas_LicencaObraDataLicencaConstrucao_Fim = MasterPanelPesquisa.get_Datas_LicencaObraDataLicencaConstrucao_Fim();

                if (MasterPanelPesquisa.get_PH_checked())
                    ndSearch.LicencaObra_PHSimNao = MasterPanelPesquisa.get_PH_checked();

                #endregion

                if (errorMessage.Length > 0)
                {
                    MessageBox.Show("O(s) campo(s) seguinte(s) tem(êm) valor(es) incorrecto(s): " +
                        System.Environment.NewLine +
                        errorMessage.ToString());

                    return 0;
                }
                
                // impedir efectuar uma pesquisa no servidor de pesquisa quando, na pesquisa avançada, não existe nenhum critério definido excepto um nivel a partir da estrutura
                if (!ndSearch.IsCriteriaEmpty() || !MasterPanelPesquisa.chkEstruturaArquivistica.Checked)
                    resultadosDaPesquisa.AddRange(SearchImpl.search(ndSearch.ToString(), "nivelDocumental", SessionHelper.GetGisaPrincipal().TrusteeUserOperator.ID.ToString()));
                else
                {
                    resultadosDaPesquisa = null;
                    countResults = -1;
                }
            }
            catch (Exception)
            {
                MessageBox.Show("Erro na conexão com o servidor de pesquisa", "Gisa", MessageBoxButtons.OK, MessageBoxIcon.Warning);
            }
            

            PesquisaList1.SearchServerIDs = resultadosDaPesquisa;
            PesquisaList1.UserID = SessionHelper.GetGisaPrincipal().TrusteeUserOperator.ID;
            PesquisaList1.SoDocExpirados = MasterPanelPesquisa.chkApenasDataElimExp.Checked;
            PesquisaList1.NewSearch = true;

            if (MasterPanelPesquisa.chkEstruturaArquivistica.Checked && MasterPanelPesquisa.cnList.SelectedNivelRow != null)
                PesquisaList1.IDNivelEstrutura = MasterPanelPesquisa.cnList.SelectedNivelRow.ID;
            else
            {
                countResults = resultadosDaPesquisa.Count;
                PesquisaList1.IDNivelEstrutura = null;
            }

            PesquisaList1.Focus();

            return countResults;
		}
Example #25
0
 public virtual void  TestBasic()
 {
     Directory dir = new MockRAMDirectory();
     Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
     IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
     writer.MergeFactor = 2;
     writer.SetMaxBufferedDocs(2);
     writer.SetSimilarity(new SimpleSimilarity());
     
     
     System.Text.StringBuilder sb = new System.Text.StringBuilder(265);
     System.String term = "term";
     for (int i = 0; i < 30; i++)
     {
         Document d = new Document();
         sb.Append(term).Append(" ");
         System.String content = sb.ToString();
         Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
         noTf.OmitTermFreqAndPositions = true;
         d.Add(noTf);
         
         Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
         d.Add(tf);
         
         writer.AddDocument(d);
         //System.out.println(d);
     }
     
     writer.Optimize();
     // flush
     writer.Close();
     _TestUtil.CheckIndex(dir);
     
     /*
     * Verify the index
     */
     Searcher searcher = new IndexSearcher(dir, true);
     searcher.Similarity = new SimpleSimilarity();
     
     Term a = new Term("noTf", term);
     Term b = new Term("tf", term);
     Term c = new Term("noTf", "notf");
     Term d2 = new Term("tf", "tf");
     TermQuery q1 = new TermQuery(a);
     TermQuery q2 = new TermQuery(b);
     TermQuery q3 = new TermQuery(c);
     TermQuery q4 = new TermQuery(d2);
     
     
     searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     
     
     
     searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
     //System.out.println(CountingHitCollector.getCount());
     
     
     
     BooleanQuery bq = new BooleanQuery();
     bq.Add(q1, Occur.MUST);
     bq.Add(q4, Occur.MUST);
     
     searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
     Assert.IsTrue(15 == CountingHitCollector.GetCount());
     
     searcher.Close();
     dir.Close();
 }
        private void Search()
        {
            try
            {
                SearchProgressBar.Maximum = 11;
                ProgressLabel.Text = "Progress: Initialize Search ...";
                Searcher searcher = new IndexSearcher(@"Canon\index");
                Analyzer analyzer = new StandardAnalyzer();
                ArrayList resultList = new ArrayList();

                System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);

                String line = QueryInputBox.Text;
                if (line.Length == - 1)
                    return;
                ProgressLabel.Text = "Progress: Parsing Query ...";
                Query query = QueryParser.Parse(line, "contents", analyzer);
                //int[] ix = qtm.GetTermFrequencies();

                Hits hits = searcher.Search(query);
                SearchProgressBar.Increment(1);
                ProgressLabel.Text = "Progress: Searched. Analyzing results ...";

                //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>");
                Highlighter highlighter = new Highlighter(new QueryScorer(query));
                highlighter.SetTextFragmenter(new SimpleFragmenter(80));
                int maxNumFragmentsRequired = 1;

                    //int HITS_PER_PAGE = 10;
                    for (int i = 0; i < 10; i++)
                    {
                            SearchProgressBar.Increment(1);
                            ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString();
                            // get the document from index
                            Document doc = hits.Doc(i);
                            //SegmentReader ir = new SegmentReader();
                            //Lucene.Net.Index.TermFreqVector tfv =
                            //tfv.GetTermFrequencies
                            string score = hits.Score(i).ToString();
                            //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n";
                            ResultSet a = new ResultSet();
                            a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\","");
                            a.Score = hits.Score(i);
                            a.numberOfHits = hits.Length();

                            // get the document filename
                            // we can't get the text from the index
                            //because we didn't store it there
                            //so get it from archive
                            string path = doc.Get("path");
                            string name = GetInternalName(path);
                            PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name);
                            path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm";
                            string plainText = "";
                            //load text from zip archive temporarily
                            using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default))
                            {
                                plainText = parseHtml(sr.ReadToEnd());
                            }
            //-------------------------------Highlighter Code 1.4
                            TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText));
                            a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "...");
                            if(File.Exists(path))
                                File.Delete(path);
            //-------------------------------
                            resultList.Add(a);
                        }
                SearchProgressBar.Value = 0;
                searcher.Close();
                ssr = new ShowSearchResults(/*Box*/resultList);
                //this.Hide();
                ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook);
                ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow);
                this.Hide();
                ssr.ShowDialog();

            }
            catch (System.Exception e)
            {
                MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
		private void  InitBlock()
		{
			analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
		}
Example #28
0
 public virtual void  TestNoPrxFile()
 {
     Directory ram = new MockRAMDirectory();
     Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
     IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
     writer.SetMaxBufferedDocs(3);
     writer.MergeFactor = 2;
     writer.UseCompoundFile = false;
     Document d = new Document();
     
     Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
     f1.OmitTermFreqAndPositions = true;
     d.Add(f1);
     
     for (int i = 0; i < 30; i++)
         writer.AddDocument(d);
     
     writer.Commit();
     
     AssertNoPrx(ram);
     
     // force merge
     writer.Optimize();
     // flush
     writer.Close();
     
     AssertNoPrx(ram);
     _TestUtil.CheckIndex(ram);
     ram.Close();
 }
		public virtual void  TestStopWordSearching()
		{
			Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
			Directory ramDir = new RAMDirectory();
			var iw = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            var doc = new Document();
			doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED));
			iw.AddDocument(doc);
			iw.Close();

            var mfqp = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, new[] { "body" }, analyzer);
			mfqp.DefaultOperator = QueryParser.Operator.AND;
            var q = mfqp.Parse("the footest");
            var is_Renamed = new IndexSearcher(ramDir, true);
            var hits = is_Renamed.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			is_Renamed.Close();
		}
		public virtual void  TestDomainNames()
		{
			// Don't reuse a because we alter its state
			// (setReplaceInvalidAcronym)
			
			// Current lucene should not show the bug
			StandardAnalyzer a2 = new StandardAnalyzer(Version.LUCENE_CURRENT);
			// domain names
			AssertAnalyzesTo(a2, "www.nutch.org", new System.String[]{"www.nutch.org"});
			//Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
			// the following should be recognized as HOST:
			AssertAnalyzesTo(a2, "www.nutch.org.", new System.String[]{"www.nutch.org"}, new System.String[]{"<HOST>"});
			
			// 2.3 should show the bug
			a2 = new StandardAnalyzer(Version.LUCENE_23);
			AssertAnalyzesTo(a2, "www.nutch.org.", new System.String[]{"wwwnutchorg"}, new System.String[]{"<ACRONYM>"});
			
			// 2.4 should not show the bug
			a2 = new StandardAnalyzer(Version.LUCENE_24);
			AssertAnalyzesTo(a2, "www.nutch.org.", new System.String[]{"www.nutch.org"}, new System.String[]{"<HOST>"});
		}
Example #31
0
		public static void  Main(System.String[] args)
		{
			System.String usage = "Usage: " + typeof(SearchFiles) + " [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field]";
			if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0])))
			{
				System.Console.Out.WriteLine(usage);
				System.Environment.Exit(0);
			}
			
			System.String index = "index";
			System.String field = "contents";
			System.String queries = null;
			int repeat = 0;
			bool raw = false;
			System.String normsField = null;
			
			for (int i = 0; i < args.Length; i++)
			{
				if ("-index".Equals(args[i]))
				{
					index = args[i + 1];
					i++;
				}
				else if ("-field".Equals(args[i]))
				{
					field = args[i + 1];
					i++;
				}
				else if ("-queries".Equals(args[i]))
				{
					queries = args[i + 1];
					i++;
				}
				else if ("-repeat".Equals(args[i]))
				{
					repeat = System.Int32.Parse(args[i + 1]);
					i++;
				}
				else if ("-raw".Equals(args[i]))
				{
					raw = true;
				}
				else if ("-norms".Equals(args[i]))
				{
					normsField = args[i + 1];
					i++;
				}
			}
			
			IndexReader reader = IndexReader.Open(index);
			
			if (normsField != null)
				reader = new OneNormsReader(reader, normsField);
			
			Searcher searcher = new IndexSearcher(reader);
			Analyzer analyzer = new StandardAnalyzer();
			
			System.IO.StreamReader in_Renamed = null;
			if (queries != null)
			{
				in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(queries, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(queries, System.Text.Encoding.Default).CurrentEncoding);
			}
			else
			{
				in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).CurrentEncoding);
			}
			QueryParser parser = new QueryParser(field, analyzer);
			while (true)
			{
				if (queries == null)
                    // prompt the user
					System.Console.Out.Write("Query: ");
				
				System.String line = in_Renamed.ReadLine();
				
				if (line == null || line.Length == 0)
					break;
				
				Query query = parser.Parse(line);
				System.Console.Out.WriteLine("Searching for: " + query.ToString(field));
				
				Hits hits = searcher.Search(query);
				
				if (repeat > 0)
				{
					// repeat & time as benchmark
					System.DateTime start = System.DateTime.Now;
					for (int i = 0; i < repeat; i++)
					{
						hits = searcher.Search(query);
					}
					System.DateTime end = System.DateTime.Now;
					System.Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms");
				}
				
				System.Console.Out.WriteLine(hits.Length() + " total matching documents");
				
				int HITS_PER_PAGE = 10;
				for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE)
				{
					int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE);
					for (int i = start; i < end; i++)
					{
						
						if (raw)
						{
							// output raw format
							System.Console.Out.WriteLine("doc=" + hits.Id(i) + " score=" + hits.Score(i));
							continue;
						}
						
						Document doc = hits.Doc(i);
						System.String path = doc.Get("path");
						if (path != null)
						{
							System.Console.Out.WriteLine((i + 1) + ". " + path);
							System.String title = doc.Get("title");
							if (title != null)
							{
								System.Console.Out.WriteLine("   Title: " + doc.Get("title"));
							}
						}
						else
						{
							System.Console.Out.WriteLine((i + 1) + ". " + "No path for this document");
						}
					}
					
					if (queries != null)
					// non-interactive
						break;
					
					if (hits.Length() > end)
					{
						System.Console.Out.Write("more (y/n) ? ");
						line = in_Renamed.ReadLine();
						if (line.Length == 0 || line[0] == 'n')
							break;
					}
				}
			}
			reader.Close();
		}
Example #32
0
 public override TokenStream TokenStream(string field, TextReader reader)
 {
     TokenStream stdStream = new Lucene.Net.Analysis.Standard.StandardAnalyzer().TokenStream(reader);
     return new StopFilter(new DiacriticFilter(new HamzaFilter(stdStream)), _stopWords);
 }
        public void Test_LUCENE_3042_LUCENENET_433()
        {
            String testString = "t";

            Analyzer analyzer = new StandardAnalyzer(_TestUtil.CurrentVersion);

            TokenStream stream = analyzer.ReusableTokenStream("dummy", new System.IO.StringReader(testString));
            stream.Reset();
            
            while (stream.IncrementToken())
            {
                // consume
            }

            stream.End();
            stream.Close();

            AssertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
        }
Example #34
0
        public static City FindCity(CityToken cityToken)
        {
            var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, CityFieldNames.Name, analyzer);
            var searcher = new IndexSearcher(FSDirectory.Open(ApplicationSettings.CityIndexDirectory), true);
            var sort = new Sort(new[] { new SortField(CityFieldNames.Population, SortField.LONG, true), SortField.FIELD_SCORE });

            var possibleCityDetails = cityToken.GetPossibleCityDetails();
            foreach (var possibleCityDetail in possibleCityDetails)
            {
                var topScoreDocCollector = TopFieldCollector.Create(sort, 5, true, false, false, false);
                var countryCode = string.Empty;
                if (!string.IsNullOrEmpty(possibleCityDetail.CountryName))
                {
                    countryCode = CountryCodes.LookupCountryCode(possibleCityDetail.CountryName);
                    if (string.IsNullOrEmpty(countryCode)) continue;
                }
                var queryText = GetQueryText(possibleCityDetail.CityName, countryCode, possibleCityDetail.AdministrativeDivisionName);

                var query = queryParser.Parse(queryText);
                searcher.Search(query, topScoreDocCollector);
                var results = topScoreDocCollector.TopDocs().ScoreDocs;

                if (topScoreDocCollector.TotalHits > 0)
                {
                    var cities = results.Select(x => new City(searcher.Doc(x.Doc))).ToList();

                    // if the name being searched for matches a country return it
                    foreach (var city in cities)
                    {
                        var countryNameMatches = String.Compare(possibleCityDetail.CityName, city.CountryName, CultureInfo.InvariantCulture, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) == 0;
                        if (countryNameMatches)
                            return city;
                    }

                    // if the name matches then return it first
                    foreach (var city in cities)
                    {
                        var cityNameMatches = String.Compare(possibleCityDetail.CityName, city.Name, CultureInfo.InvariantCulture, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) == 0;
                        var asciiNameMatches = String.Compare(possibleCityDetail.CityName, city.AsciiName, CultureInfo.InvariantCulture, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) == 0;
                        var countryNameMatches = String.IsNullOrEmpty(possibleCityDetail.CountryName) || String.Compare(possibleCityDetail.CountryName, city.CountryName, CultureInfo.InvariantCulture, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) == 0;
                        if ((cityNameMatches || asciiNameMatches) && countryNameMatches)
                            return city;
                    }

                    // if there were no direct city name or country name matches then just return the first result (which by default is sorted by population)
                    var firstDocId = results[0].Doc;
                    var firstDocument = searcher.Doc(firstDocId);
                    return new City(firstDocument);
                }
            }

            return null;
        }
		public virtual void  TestLucene1140()
		{
			try
			{
				StandardAnalyzer analyzer = new StandardAnalyzer(true);
				AssertAnalyzesTo(analyzer, "www.nutch.org.", new System.String[]{"www.nutch.org"}, new System.String[]{"<HOST>"});
			}
			catch (System.NullReferenceException e)
			{
				Assert.IsTrue(false, "Should not throw an NPE and it did");
			}
		}
        public virtual void TestParsingQueryWithoutBoosts()
        {
            var analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
            var fields = new[] {"f1", "f2"};
            var boosts = new Dictionary<String, Single>
                {
                    {"f1", 2}
                    // missing f2 intentional
                };

            var parser = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, fields, analyzer, boosts);
            var query = parser.Parse("bazinga");

            Assert.AreEqual("f1:bazinga^2.0 f2:bazinga", query.ToString());
        }
Example #37
0
		public virtual void  TestGiga()
		{
			
			StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
			
			Directory index = new MockRAMDirectory();
			IndexWriter w = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
			
			AddDoc("Lucene in Action", w);
			AddDoc("Lucene for Dummies", w);
			
			// addDoc("Giga", w);
			AddDoc("Giga byte", w);
			
			AddDoc("ManagingGigabytesManagingGigabyte", w);
			AddDoc("ManagingGigabytesManagingGigabytes", w);
			
			AddDoc("The Art of Computer Science", w);
			AddDoc("J. K. Rowling", w);
			AddDoc("JK Rowling", w);
			AddDoc("Joanne K Roling", w);
			AddDoc("Bruce Willis", w);
			AddDoc("Willis bruce", w);
			AddDoc("Brute willis", w);
			AddDoc("B. willis", w);
			IndexReader r = w.GetReader();
			w.Close();
			
			Query q = new QueryParser(Util.Version.LUCENE_CURRENT, "field", analyzer).Parse("giga~0.9");
			
			// 3. search
			IndexSearcher searcher = new IndexSearcher(r);
			ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			Assert.AreEqual(searcher.Doc(hits[0].Doc).Get("field"), "Giga byte");
			r.Close();
		}
		public virtual void TestWickedLongTerm()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true);
			
			char[] chars = new char[16383];
			for (int index = 0; index < chars.Length; index++)
				chars.SetValue('x', index);
			Document doc = new Document();
			System.String bigTerm = new System.String(chars);
			
			// Max length term is 16383, so this contents produces
			// a too-long term:
			System.String contents = "abc xyz x" + bigTerm + " another term";
			doc.Add(new Field("content", contents, Field.Store.NO, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			// Make sure we can add another normal document
			doc = new Document();
			doc.Add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			writer.Close();
			
			IndexReader reader = IndexReader.Open(dir);
			
			// Make sure all terms < max size were indexed
			Assert.AreEqual(2, reader.DocFreq(new Term("content", "abc")));
			Assert.AreEqual(1, reader.DocFreq(new Term("content", "bbb")));
			Assert.AreEqual(1, reader.DocFreq(new Term("content", "term")));
			Assert.AreEqual(1, reader.DocFreq(new Term("content", "another")));
			
			// Make sure position is still incremented when
			// massive term is skipped:
			TermPositions tps = reader.TermPositions(new Term("content", "another"));
			Assert.IsTrue(tps.Next());
			Assert.AreEqual(1, tps.Freq());
			Assert.AreEqual(3, tps.NextPosition());
			
			// Make sure the doc that has the massive term is in
			// the index:
			Assert.AreEqual(2, reader.NumDocs(), "document with wicked long term should is not in the index!");
			
			reader.Close();
			
			// Make sure we can add a document with exactly the
			// maximum length term, and search on that term:
			doc = new Document();
			doc.Add(new Field("content", bigTerm, Field.Store.NO, Field.Index.TOKENIZED));
			StandardAnalyzer sa = new StandardAnalyzer();
			sa.SetMaxTokenLength(100000);
			writer = new IndexWriter(dir, sa);
			writer.AddDocument(doc);
			writer.Close();
			reader = IndexReader.Open(dir);
			Assert.AreEqual(1, reader.DocFreq(new Term("content", bigTerm)));
			reader.Close();
			
			dir.Close();
		}
Example #39
0
		public static void  Main(System.String[] args)
		{
			try
			{
				Searcher searcher = new IndexSearcher(@"index");
				Analyzer analyzer = new StandardAnalyzer();
				
				System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);
				while (true)
				{
					System.Console.Out.Write("Query: ");
					System.String line = in_Renamed.ReadLine();
					
					if (line.Length == - 1)
						break;
					
					Query query = QueryParser.Parse(line, "contents", analyzer);
					System.Console.Out.WriteLine("Searching for: " + query.ToString("contents"));
					
					Hits hits = searcher.Search(query);
					System.Console.Out.WriteLine(hits.Length() + " total matching documents");
					
					int HITS_PER_PAGE = 10;
					for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE)
					{
						int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE);
						for (int i = start; i < end; i++)
						{
							Document doc = hits.Doc(i);
							System.String path = doc.Get("path");
							if (path != null)
							{
								System.Console.Out.WriteLine(i + ". " + path);
							}
							else
							{
								System.String url = doc.Get("url");
								if (url != null)
								{
									System.Console.Out.WriteLine(i + ". " + url);
									System.Console.Out.WriteLine("   - " + doc.Get("title"));
								}
								else
								{
									System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document");
								}
							}
						}
						
						if (hits.Length() > end)
						{
							System.Console.Out.Write("more (y/n) ? ");
							line = in_Renamed.ReadLine();
							if (line.Length == 0 || line[0] == 'n')
								break;
						}
					}
				}
				searcher.Close();
			}
			catch (System.Exception e)
			{
				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
			}
		}
 public StandardAnalyzer() {
     Analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(SearchSettings.Instance.LuceneVersion);
 }