Beispiel #1
0
        public void SkipAndTakeWorkWhenSearchingUsingAContextProvider()
        {
            InitializeContext();
            LuceneIndexerOptions options = new LuceneIndexerOptions()
            {
                Path = "lucene"
            };
            var           memoryCache = new MemoryCache(new MemoryCacheOptions());
            var           directory   = FSDirectory.Open(options.Path);
            JieBaAnalyzer analyzer    = new JieBaAnalyzer(TokenizerMode.Search);
            SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache);

            searchProvider.CreateIndex();
            SearchOptions searchOptions = new SearchOptions("John", "FirstName");

            var initialResults = searchProvider.ScoredSearch <User>(searchOptions);
            var lastId         = initialResults.Results[4].Entity.Id;

            Assert.Equal(5, initialResults.TotalHits);
            Assert.Equal(5, initialResults.Results.Count);

            searchOptions.Skip = 4;
            searchOptions.Take = 1;
            var subResults = searchProvider.ScoredSearch <User>(searchOptions);

            Assert.Equal(5, subResults.TotalHits);
            Assert.Equal(1, subResults.Results.Count);
            Assert.Equal(lastId, subResults.Results.First().Entity.Id);

            searchProvider.DeleteIndex();
        }
Beispiel #2
0
        /// <summary>
        /// 初始化索引
        /// </summary>
        public void InitIndex()
        {
            Analyzer          analyze            = new JieBaAnalyzer(TokenizerMode.Default);
            IndexWriterConfig _indexWriterConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyze);
            FSDirectory       directory          = FSDirectory.Open(new DirectoryInfo(_hostingEnvironment.ContentRootPath + "\\Lucene\\Index"));

            using (IndexWriter _indexWriter = new IndexWriter(directory, _indexWriterConfig))
            {
                _indexWriter.DeleteAll();
                foreach (var item in BookList())
                {
                    Document doc = new Document();
                    doc.Add(new StringField("Id", item.Id, Field.Store.YES));
                    doc.Add(new TextField("Author", item.Author, Field.Store.YES));
                    //doc.Add(new StringField("Author_String", item.Author, Field.Store.YES));
                    doc.Add(new TextField("Name", item.Name, Field.Store.YES));
                    //doc.Add(new StringField("Name_String", item.Name, Field.Store.YES));
                    doc.Add(new TextField("FileName", item.FileName, Field.Store.YES));
                    //doc.Add(new StringField("FileName_String", item.FileName, Field.Store.YES));
                    doc.Add(new TextField("Content", item.Content, Field.Store.YES));
                    _indexWriter.AddDocument(doc);
                }
                //_indexWriter.ForceMerge(1);
            }

            //https://www.cnblogs.com/dacc123/p/8431369.html
            //https://www.cnblogs.com/jesen1315/p/11065331.html
        }
Beispiel #3
0
        public void SaveChangesUpdatesEntitiesAddedToTheIndex()
        {
            InitializeContext();
            LuceneIndexerOptions options = new LuceneIndexerOptions()
            {
                Path = "lucene"
            };
            var           memoryCache = new MemoryCache(new MemoryCacheOptions());
            var           directory   = FSDirectory.Open(options.Path);
            JieBaAnalyzer analyzer    = new JieBaAnalyzer(TokenizerMode.Search);
            SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache);

            searchProvider.CreateIndex();
            var newUser = new User()
            {
                FirstName = "Duke",
                Surname   = "Nukem",
                Email     = "*****@*****.**",
                JobTitle  = "Shooty Man"
            };
            var search = new SearchOptions("Nukem", "Surname");

            var initialResults = searchProvider.Search <User>(search);

            searchProvider.Context.Users.Add(newUser);
            searchProvider.SaveChanges();
            var newResults = searchProvider.Search <User>(search);

            Assert.Equal(0, initialResults.TotalHits);
            Assert.Equal(1, newResults.TotalHits);
            Assert.Equal(newUser.Id, newResults.Results[0].Id);
        }
Beispiel #4
0
        public void ASearchCanOrderByMultipleFields()
        {
            InitializeContext();
            LuceneIndexerOptions options = new LuceneIndexerOptions()
            {
                Path = "lucene"
            };
            var           memoryCache = new MemoryCache(new MemoryCacheOptions());
            var           directory   = FSDirectory.Open(options.Path);
            JieBaAnalyzer analyzer    = new JieBaAnalyzer(TokenizerMode.Search);
            SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache);
            User jc = new User()
            {
                FirstName = "John",
                Surname   = "Chapman",
                JobTitle  = "Test Engineer",
                Email     = "*****@*****.**"
            };

            _context.Users.Add(jc);
            _context.SaveChanges();
            searchProvider.CreateIndex();
            SearchOptions search = new SearchOptions("John", "FirstName", 1000, null, null, "Surname,JobTitle");

            var results      = searchProvider.ScoredSearch <User>(search);
            var topResult    = results.Results[0];
            var secondResult = results.Results[1];

            Assert.Equal("Sales Associate", topResult.Entity.JobTitle);
            Assert.Equal("Test Engineer", secondResult.Entity.JobTitle);

            searchProvider.DeleteIndex();
        }
Beispiel #5
0
        public void AScoredSearchWillOrderByRelevence()
        {
            InitializeContext();
            LuceneIndexerOptions options = new LuceneIndexerOptions()
            {
                Path = "lucene"
            };
            var           memoryCache = new MemoryCache(new MemoryCacheOptions());
            var           directory   = FSDirectory.Open(options.Path);
            JieBaAnalyzer analyzer    = new JieBaAnalyzer(TokenizerMode.Search);
            SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache);

            searchProvider.CreateIndex();
            SearchOptions searchOptions = new SearchOptions("Burns", "FirstName,Surname");

            var results = searchProvider.ScoredSearch <User>(searchOptions);
            var first   = results.Results.First().Entity;
            var highest = results.Results.First().Score;
            var lowest  = results.Results.Last().Score;

            Assert.True(highest > lowest);
            Assert.Equal("Jeremy", first.FirstName);
            Assert.Equal("Burns", first.Surname);

            searchProvider.DeleteIndex();
        }
        public new void Init()
        {
            base.Init();
            var analyzer = new JieBaAnalyzer(JiebaNet.Segmenter.TokenizerMode.Search);
            var searcher = new LuceneIndexSearcher(host.Services.GetService <Directory>(), analyzer, host.Services.GetService <ICache>());

            highLighter = new LuceneHighLighter(analyzer, searcher, 150, 200, ILuceneHighLighter.DEFAULT_PRETAG, ILuceneHighLighter.DEFAULT_POSTTAG);
        }
Beispiel #7
0
        public override void ConfigureServices(IServiceCollection services)
        {
            var jieBaAnalyzer = new JieBaAnalyzer(TokenizerMode.Search);

            services.Configure <OrchardCore.Lucene.LuceneOptions>(option =>
            {
                option.Analyzers.Add(new JiabaLunceAnalyzer("jiebaAnalyzer", jieBaAnalyzer));
            });

            services.AddScoped <IContentDisplayDriver, BraksnTermPartContentDriver>();

            services.AddScoped <IBraksnSearchQueryService, BraksnSearchQueryService>();

            //services.AddSingleton<IMediaCreatingEventHandler, BraksnMediaCreateEvent>();
        }
Beispiel #8
0
        public void NonValidEntitiesAreIgnored()
        {
            InitializeContext();
            LuceneIndexerOptions options = new LuceneIndexerOptions()
            {
                Path = "lucene"
            };
            var           memoryCache = new MemoryCache(new MemoryCacheOptions());
            var           directory   = FSDirectory.Open(options.Path);
            JieBaAnalyzer analyzer    = new JieBaAnalyzer(TokenizerMode.Search);
            SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache);

            searchProvider.CreateIndex();

            Assert.True(searchProvider.IndexCount > 0);
        }
Beispiel #9
0
        private object GetResultData(IndexSearcher searcher, TopDocs docs, Query query)
        {
            SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
            Highlighter         highlighter         = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));

            highlighter.TextFragmenter = new SimpleFragmenter(150);
            Analyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search);



            var result = new List <Book>();

            foreach (ScoreDoc sd in docs.ScoreDocs)
            {
                Document doc    = searcher.Doc(sd.Doc);
                var      author = highlighter.GetBestFragment(analyzer, "Author", doc.Get("Author"));
                if (string.IsNullOrWhiteSpace(author))
                {
                    author = doc.Get("Author");
                }
                var name = highlighter.GetBestFragment(analyzer, "Name", doc.Get("Name"));
                if (string.IsNullOrWhiteSpace(name))
                {
                    name = doc.Get("Name");
                }
                var fileName = highlighter.GetBestFragment(analyzer, "FileName", doc.Get("FileName"));
                if (string.IsNullOrWhiteSpace(fileName))
                {
                    fileName = doc.Get("FileName");
                }
                var content = highlighter.GetBestFragment(analyzer, "Content", doc.Get("Content"));
                if (string.IsNullOrWhiteSpace(content))
                {
                    content = doc.Get("Content");
                }
                result.Add(new Book()
                {
                    Id       = doc.Get("Id"),
                    Author   = author,
                    Name     = name,
                    FileName = fileName,
                    Content  = content
                });
            }

            return(new { Items = result, TotalCount = docs.TotalHits });
        }
Beispiel #10
0
        public void AContextProviderCanIndexADatabase()
        {
            InitializeContext();
            LuceneIndexerOptions options = new LuceneIndexerOptions()
            {
                Path = "lucene"
            };
            var           memoryCache = new MemoryCache(new MemoryCacheOptions());
            var           directory   = FSDirectory.Open(options.Path);
            JieBaAnalyzer analyzer    = new JieBaAnalyzer(TokenizerMode.Search);
            SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache);

            searchProvider.CreateIndex();

            Assert.Equal(2000, searchProvider.IndexCount);
            searchProvider.DeleteIndex();
        }
Beispiel #11
0
        public object ShowFields(string[] field, string keyword)
        {
            if (string.IsNullOrWhiteSpace(keyword))
            {
                return(new { Items = new List <Book>(), TotalCount = 0 });
            }
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_hostingEnvironment.ContentRootPath + "\\Lucene\\Index"));

            using (IndexReader reader = DirectoryReader.Open(directory))
            {
                IndexSearcher searcher = new IndexSearcher(reader);
                Analyzer      analyzer = new JieBaAnalyzer(TokenizerMode.Default);
                QueryParser   parser   = new MultiFieldQueryParser(LuceneVersion.LUCENE_48, field, analyzer);
                Query         query    = parser.Parse(keyword);
                TopDocs       docs     = searcher.Search(query, null, 1000);

                return(GetResultData(searcher, docs, query));
            }
        }
Beispiel #12
0
        public void AContextCanBeSearchedUsingAContextProvider()
        {
            InitializeContext();

            LuceneIndexerOptions options = new LuceneIndexerOptions()
            {
                Path = "lucene"
            };
            var           memoryCache = new MemoryCache(new MemoryCacheOptions());
            var           directory   = FSDirectory.Open(options.Path);
            JieBaAnalyzer analyzer    = new JieBaAnalyzer(TokenizerMode.Search);
            SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache);

            searchProvider.CreateIndex();
            SearchOptions searchOptions = new SearchOptions("John", "FirstName");

            var results = searchProvider.ScoredSearch <User>(searchOptions);

            Assert.Equal(5, results.TotalHits);
            searchProvider.DeleteIndex();
        }
Beispiel #13
0
        public object ShowAdvanced(IEnumerable <MultiFieldInput> input)
        {
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_hostingEnvironment.ContentRootPath + "\\Lucene\\Index"));

            using (IndexReader reader = DirectoryReader.Open(directory))
            {
                IndexSearcher searcher = new IndexSearcher(reader);
                Analyzer      analyzer = new JieBaAnalyzer(TokenizerMode.Default);
                BooleanQuery  bq       = new BooleanQuery();
                foreach (var item in input)
                {
                    if (string.IsNullOrWhiteSpace(item.Keyword))
                    {
                        continue;
                    }
                    QueryParser parser = new QueryParser(LuceneVersion.LUCENE_48, item.Field, analyzer);
                    Query       query  = parser.Parse(item.Keyword);
                    bq.Add(query, item.Occur);
                }
                TopDocs docs = searcher.Search(bq, null, 1000);
                return(GetResultData(searcher, docs, bq));
            }
        }
Beispiel #14
0
        static void Main(string[] args)
        {
            //var analyzer = new JieBaAnalyzer(TokenizerMode.Default);
            var IndexWriterConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, new JieBaAnalyzer(TokenizerMode.Default));

            var directory   = FSDirectory.Open(new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory + "Lucene"));
            var indexWriter = new IndexWriter(directory, IndexWriterConfig);
            var document    = new Document();


            var fieldList = new List <Field>();
            //var test = new StringField("id", "22", Field.Store.YES);
            //var test = new StringField("id", "22", Field.Store.YES);
            var fieldType = new FieldType();

            //var newFeild = new Field("id", "22", Field.Store.YES, Field.Index.ANALYZED);
            //var newFeild2 = new Field("soc", "呵呵", Field.Store.YES, Field.Index.ANALYZED);
            //var newFeild3 = new Field("shot", "内容分类标准以及为读者提供的任何信息", Field.Store.YES, Field.Index.ANALYZED);
            //var newFeild4 = new Field("content", "《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。", Field.Store.YES, Field.Index.ANALYZED);
            //fieldList.Add(newFeild);
            //fieldList.Add(newFeild2);
            //fieldList.Add(newFeild3);
            //fieldList.Add(newFeild4);
            fieldList.Add(new TextField("id", "22", Field.Store.YES));
            fieldList.Add(new TextField("soc", "呵呵", Field.Store.YES));
            fieldList.Add(new TextField("shot", "内容分类标准以及为读者提供的任何信息", Field.Store.YES));
            fieldList.Add(new TextField("content", "《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。", Field.Store.YES));

            indexWriter.AddDocument(fieldList);
            indexWriter.Commit();


            while (true)
            {
                // 1、创建Directory
                //var directory = FSDirectory.Open(FileSystems.getDefault().getPath(INDEX_PATH));
                // 2、创建IndexReader
                var directoryReader = DirectoryReader.Open(directory);
                // 3、根据IndexReader创建IndexSearch
                IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

                var queryK = Console.ReadLine();

                // MultiFieldQueryParser表示多个域解析, 同时可以解析含空格的字符串,如果我们搜索"上海 中国"
                var      analyzer        = new JieBaAnalyzer(TokenizerMode.Search);
                String[] fields          = { "soc", "content" };
                Occur[]  clauses         = { Occur.SHOULD, Occur.SHOULD };
                Query    multiFieldQuery = MultiFieldQueryParser.Parse(LuceneVersion.LUCENE_48, queryK, fields, clauses, analyzer);

                var bb = new Lucene.Net.Search.TermQuery(new Term("shot", queryK));

                var fuzzy = new FuzzyQuery(new Term("content", queryK));
                // 5、根据searcher搜索并且返回TopDocs
                TopDocs topDocs = indexSearcher.Search(fuzzy, 100); // 搜索前100条结果
                Console.WriteLine("找到: " + topDocs.TotalHits);



                QueryScorer scorer = new QueryScorer(fuzzy, "content");
                // 自定义高亮代码
                SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span style=\"backgroud:red\">", "</span>");
                Highlighter         highlighter   = new Highlighter(htmlFormatter, scorer);
                //highlighter.set(new SimpleSpanFragmenter(scorer));

                foreach (var doc in topDocs.ScoreDocs)
                {
                    var returnDoc = indexSearcher.Doc(doc.Doc);
                    //Console.WriteLine("soc : " + returnDoc.Get("soc"));
                    var resultHiligh = highlighter.GetBestFragments(analyzer, "content", returnDoc.Get("content"), 3);
                    Console.WriteLine(string.Join("", resultHiligh));
                }
                //Console.WriteLine("go... press enter ");
                //Console.ReadLine();
            }

            //            valindexConfig: IndexWriterConfig = new IndexWriterConfig(new StandardAnalyzer());

            //            indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)

            ////  indexConfig.setInfoStream(System.out)

            //            val directory:Directory = FSDirectory.open(Paths.get(indexPath))

            //val indexWriter:IndexWriter = new IndexWriter(directory, indexConfig)


            var segmenter = new JiebaSegmenter();
            var segments  = segmenter.Cut("我来到北京清华大学", cutAll: true);

            Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments));

            segments = segmenter.Cut("我来到北京清华大学");  // 默认为精确模式
            Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments));

            segments = segmenter.Cut("他来到了网易杭研大厦");  // 默认为精确模式,同时也使用HMM模型
            Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments));

            segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式
            Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments));

            segments = segmenter.Cut("结过婚的和尚未结过婚的");
            Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments));

            segments = segmenter.Cut("北京大学生喝进口红酒");
            Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments));

            segments = segmenter.Cut("在北京大学生活区喝进口红酒");
            Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments));

            segments = segmenter.Cut("腾讯视频致力于打造中国最大的在线视频媒体平台,以丰富的内容、极致的观看体验");
            Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments));

            segmenter.DeleteWord("湖南");
            segmenter.AddWord("湖南");
            //segmenter.AddWord("长沙市");
            segments = segmenter.Cut("湖南长沙市天心区");
            Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments));
            Console.Read();
        }