public void SkipAndTakeWorkWhenSearchingUsingAContextProvider() { InitializeContext(); LuceneIndexerOptions options = new LuceneIndexerOptions() { Path = "lucene" }; var memoryCache = new MemoryCache(new MemoryCacheOptions()); var directory = FSDirectory.Open(options.Path); JieBaAnalyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search); SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache); searchProvider.CreateIndex(); SearchOptions searchOptions = new SearchOptions("John", "FirstName"); var initialResults = searchProvider.ScoredSearch <User>(searchOptions); var lastId = initialResults.Results[4].Entity.Id; Assert.Equal(5, initialResults.TotalHits); Assert.Equal(5, initialResults.Results.Count); searchOptions.Skip = 4; searchOptions.Take = 1; var subResults = searchProvider.ScoredSearch <User>(searchOptions); Assert.Equal(5, subResults.TotalHits); Assert.Equal(1, subResults.Results.Count); Assert.Equal(lastId, subResults.Results.First().Entity.Id); searchProvider.DeleteIndex(); }
/// <summary> /// 初始化索引 /// </summary> public void InitIndex() { Analyzer analyze = new JieBaAnalyzer(TokenizerMode.Default); IndexWriterConfig _indexWriterConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyze); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_hostingEnvironment.ContentRootPath + "\\Lucene\\Index")); using (IndexWriter _indexWriter = new IndexWriter(directory, _indexWriterConfig)) { _indexWriter.DeleteAll(); foreach (var item in BookList()) { Document doc = new Document(); doc.Add(new StringField("Id", item.Id, Field.Store.YES)); doc.Add(new TextField("Author", item.Author, Field.Store.YES)); //doc.Add(new StringField("Author_String", item.Author, Field.Store.YES)); doc.Add(new TextField("Name", item.Name, Field.Store.YES)); //doc.Add(new StringField("Name_String", item.Name, Field.Store.YES)); doc.Add(new TextField("FileName", item.FileName, Field.Store.YES)); //doc.Add(new StringField("FileName_String", item.FileName, Field.Store.YES)); doc.Add(new TextField("Content", item.Content, Field.Store.YES)); _indexWriter.AddDocument(doc); } //_indexWriter.ForceMerge(1); } //https://www.cnblogs.com/dacc123/p/8431369.html //https://www.cnblogs.com/jesen1315/p/11065331.html }
public void SaveChangesUpdatesEntitiesAddedToTheIndex() { InitializeContext(); LuceneIndexerOptions options = new LuceneIndexerOptions() { Path = "lucene" }; var memoryCache = new MemoryCache(new MemoryCacheOptions()); var directory = FSDirectory.Open(options.Path); JieBaAnalyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search); SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache); searchProvider.CreateIndex(); var newUser = new User() { FirstName = "Duke", Surname = "Nukem", Email = "*****@*****.**", JobTitle = "Shooty Man" }; var search = new SearchOptions("Nukem", "Surname"); var initialResults = searchProvider.Search <User>(search); searchProvider.Context.Users.Add(newUser); searchProvider.SaveChanges(); var newResults = searchProvider.Search <User>(search); Assert.Equal(0, initialResults.TotalHits); Assert.Equal(1, newResults.TotalHits); Assert.Equal(newUser.Id, newResults.Results[0].Id); }
public void ASearchCanOrderByMultipleFields() { InitializeContext(); LuceneIndexerOptions options = new LuceneIndexerOptions() { Path = "lucene" }; var memoryCache = new MemoryCache(new MemoryCacheOptions()); var directory = FSDirectory.Open(options.Path); JieBaAnalyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search); SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache); User jc = new User() { FirstName = "John", Surname = "Chapman", JobTitle = "Test Engineer", Email = "*****@*****.**" }; _context.Users.Add(jc); _context.SaveChanges(); searchProvider.CreateIndex(); SearchOptions search = new SearchOptions("John", "FirstName", 1000, null, null, "Surname,JobTitle"); var results = searchProvider.ScoredSearch <User>(search); var topResult = results.Results[0]; var secondResult = results.Results[1]; Assert.Equal("Sales Associate", topResult.Entity.JobTitle); Assert.Equal("Test Engineer", secondResult.Entity.JobTitle); searchProvider.DeleteIndex(); }
public void AScoredSearchWillOrderByRelevence() { InitializeContext(); LuceneIndexerOptions options = new LuceneIndexerOptions() { Path = "lucene" }; var memoryCache = new MemoryCache(new MemoryCacheOptions()); var directory = FSDirectory.Open(options.Path); JieBaAnalyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search); SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache); searchProvider.CreateIndex(); SearchOptions searchOptions = new SearchOptions("Burns", "FirstName,Surname"); var results = searchProvider.ScoredSearch <User>(searchOptions); var first = results.Results.First().Entity; var highest = results.Results.First().Score; var lowest = results.Results.Last().Score; Assert.True(highest > lowest); Assert.Equal("Jeremy", first.FirstName); Assert.Equal("Burns", first.Surname); searchProvider.DeleteIndex(); }
public new void Init() { base.Init(); var analyzer = new JieBaAnalyzer(JiebaNet.Segmenter.TokenizerMode.Search); var searcher = new LuceneIndexSearcher(host.Services.GetService <Directory>(), analyzer, host.Services.GetService <ICache>()); highLighter = new LuceneHighLighter(analyzer, searcher, 150, 200, ILuceneHighLighter.DEFAULT_PRETAG, ILuceneHighLighter.DEFAULT_POSTTAG); }
public override void ConfigureServices(IServiceCollection services) { var jieBaAnalyzer = new JieBaAnalyzer(TokenizerMode.Search); services.Configure <OrchardCore.Lucene.LuceneOptions>(option => { option.Analyzers.Add(new JiabaLunceAnalyzer("jiebaAnalyzer", jieBaAnalyzer)); }); services.AddScoped <IContentDisplayDriver, BraksnTermPartContentDriver>(); services.AddScoped <IBraksnSearchQueryService, BraksnSearchQueryService>(); //services.AddSingleton<IMediaCreatingEventHandler, BraksnMediaCreateEvent>(); }
public void NonValidEntitiesAreIgnored() { InitializeContext(); LuceneIndexerOptions options = new LuceneIndexerOptions() { Path = "lucene" }; var memoryCache = new MemoryCache(new MemoryCacheOptions()); var directory = FSDirectory.Open(options.Path); JieBaAnalyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search); SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache); searchProvider.CreateIndex(); Assert.True(searchProvider.IndexCount > 0); }
private object GetResultData(IndexSearcher searcher, TopDocs docs, Query query) { SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.TextFragmenter = new SimpleFragmenter(150); Analyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search); var result = new List <Book>(); foreach (ScoreDoc sd in docs.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); var author = highlighter.GetBestFragment(analyzer, "Author", doc.Get("Author")); if (string.IsNullOrWhiteSpace(author)) { author = doc.Get("Author"); } var name = highlighter.GetBestFragment(analyzer, "Name", doc.Get("Name")); if (string.IsNullOrWhiteSpace(name)) { name = doc.Get("Name"); } var fileName = highlighter.GetBestFragment(analyzer, "FileName", doc.Get("FileName")); if (string.IsNullOrWhiteSpace(fileName)) { fileName = doc.Get("FileName"); } var content = highlighter.GetBestFragment(analyzer, "Content", doc.Get("Content")); if (string.IsNullOrWhiteSpace(content)) { content = doc.Get("Content"); } result.Add(new Book() { Id = doc.Get("Id"), Author = author, Name = name, FileName = fileName, Content = content }); } return(new { Items = result, TotalCount = docs.TotalHits }); }
public void AContextProviderCanIndexADatabase() { InitializeContext(); LuceneIndexerOptions options = new LuceneIndexerOptions() { Path = "lucene" }; var memoryCache = new MemoryCache(new MemoryCacheOptions()); var directory = FSDirectory.Open(options.Path); JieBaAnalyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search); SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache); searchProvider.CreateIndex(); Assert.Equal(2000, searchProvider.IndexCount); searchProvider.DeleteIndex(); }
public object ShowFields(string[] field, string keyword) { if (string.IsNullOrWhiteSpace(keyword)) { return(new { Items = new List <Book>(), TotalCount = 0 }); } FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_hostingEnvironment.ContentRootPath + "\\Lucene\\Index")); using (IndexReader reader = DirectoryReader.Open(directory)) { IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new JieBaAnalyzer(TokenizerMode.Default); QueryParser parser = new MultiFieldQueryParser(LuceneVersion.LUCENE_48, field, analyzer); Query query = parser.Parse(keyword); TopDocs docs = searcher.Search(query, null, 1000); return(GetResultData(searcher, docs, query)); } }
public void AContextCanBeSearchedUsingAContextProvider() { InitializeContext(); LuceneIndexerOptions options = new LuceneIndexerOptions() { Path = "lucene" }; var memoryCache = new MemoryCache(new MemoryCacheOptions()); var directory = FSDirectory.Open(options.Path); JieBaAnalyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search); SearchEngine <TestDbContext> searchProvider = new SearchEngine <TestDbContext>(_context, directory, analyzer, memoryCache); searchProvider.CreateIndex(); SearchOptions searchOptions = new SearchOptions("John", "FirstName"); var results = searchProvider.ScoredSearch <User>(searchOptions); Assert.Equal(5, results.TotalHits); searchProvider.DeleteIndex(); }
public object ShowAdvanced(IEnumerable <MultiFieldInput> input) { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(_hostingEnvironment.ContentRootPath + "\\Lucene\\Index")); using (IndexReader reader = DirectoryReader.Open(directory)) { IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new JieBaAnalyzer(TokenizerMode.Default); BooleanQuery bq = new BooleanQuery(); foreach (var item in input) { if (string.IsNullOrWhiteSpace(item.Keyword)) { continue; } QueryParser parser = new QueryParser(LuceneVersion.LUCENE_48, item.Field, analyzer); Query query = parser.Parse(item.Keyword); bq.Add(query, item.Occur); } TopDocs docs = searcher.Search(bq, null, 1000); return(GetResultData(searcher, docs, bq)); } }
static void Main(string[] args) { //var analyzer = new JieBaAnalyzer(TokenizerMode.Default); var IndexWriterConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, new JieBaAnalyzer(TokenizerMode.Default)); var directory = FSDirectory.Open(new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory + "Lucene")); var indexWriter = new IndexWriter(directory, IndexWriterConfig); var document = new Document(); var fieldList = new List <Field>(); //var test = new StringField("id", "22", Field.Store.YES); //var test = new StringField("id", "22", Field.Store.YES); var fieldType = new FieldType(); //var newFeild = new Field("id", "22", Field.Store.YES, Field.Index.ANALYZED); //var newFeild2 = new Field("soc", "呵呵", Field.Store.YES, Field.Index.ANALYZED); //var newFeild3 = new Field("shot", "内容分类标准以及为读者提供的任何信息", Field.Store.YES, Field.Index.ANALYZED); //var newFeild4 = new Field("content", "《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。", Field.Store.YES, Field.Index.ANALYZED); //fieldList.Add(newFeild); //fieldList.Add(newFeild2); //fieldList.Add(newFeild3); //fieldList.Add(newFeild4); fieldList.Add(new TextField("id", "22", Field.Store.YES)); fieldList.Add(new TextField("soc", "呵呵", Field.Store.YES)); fieldList.Add(new TextField("shot", "内容分类标准以及为读者提供的任何信息", Field.Store.YES)); fieldList.Add(new TextField("content", "《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。《人民日报》(电子版)的一切内容(包括但不限于文字、图片、PDF、图表、标志、标识、商标、版面设计、专栏目录与名称、内容分类标准以及为读者提供的任何信息)仅供人民网读者阅读、学习研究使用,未经人民网股份有限公司及/或相关权利人书面授权,任何单位及个人不得将《人民日报》(电子版)所登载、发布的内容用于商业性目的,包括但不限于转载、复制、发行、制作光盘、数据库、触摸展示等行为方式,或将之在非本站所属的服务器上作镜像。否则,人民网股份有限公司将采取包括但不限于网上公示、向有关部门举报、诉讼等一切合法手段,追究侵权者的法律责任。", Field.Store.YES)); indexWriter.AddDocument(fieldList); indexWriter.Commit(); while (true) { // 1、创建Directory //var directory = FSDirectory.Open(FileSystems.getDefault().getPath(INDEX_PATH)); // 2、创建IndexReader var directoryReader = DirectoryReader.Open(directory); // 3、根据IndexReader创建IndexSearch IndexSearcher indexSearcher = new IndexSearcher(directoryReader); var queryK = Console.ReadLine(); // MultiFieldQueryParser表示多个域解析, 同时可以解析含空格的字符串,如果我们搜索"上海 中国" var analyzer = new JieBaAnalyzer(TokenizerMode.Search); String[] fields = { "soc", "content" }; Occur[] clauses = { Occur.SHOULD, Occur.SHOULD }; Query multiFieldQuery = MultiFieldQueryParser.Parse(LuceneVersion.LUCENE_48, queryK, fields, clauses, analyzer); var bb = new Lucene.Net.Search.TermQuery(new Term("shot", queryK)); var fuzzy = new FuzzyQuery(new Term("content", queryK)); // 5、根据searcher搜索并且返回TopDocs TopDocs topDocs = indexSearcher.Search(fuzzy, 100); // 搜索前100条结果 Console.WriteLine("找到: " + topDocs.TotalHits); QueryScorer scorer = new QueryScorer(fuzzy, "content"); // 自定义高亮代码 SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span style=\"backgroud:red\">", "</span>"); Highlighter highlighter = new Highlighter(htmlFormatter, scorer); //highlighter.set(new SimpleSpanFragmenter(scorer)); foreach (var doc in topDocs.ScoreDocs) { var returnDoc = indexSearcher.Doc(doc.Doc); //Console.WriteLine("soc : " + returnDoc.Get("soc")); var resultHiligh = highlighter.GetBestFragments(analyzer, "content", returnDoc.Get("content"), 3); Console.WriteLine(string.Join("", resultHiligh)); } //Console.WriteLine("go... press enter "); //Console.ReadLine(); } // valindexConfig: IndexWriterConfig = new IndexWriterConfig(new StandardAnalyzer()); // indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND) //// indexConfig.setInfoStream(System.out) // val directory:Directory = FSDirectory.open(Paths.get(indexPath)) //val indexWriter:IndexWriter = new IndexWriter(directory, indexConfig) var segmenter = new JiebaSegmenter(); var segments = segmenter.Cut("我来到北京清华大学", cutAll: true); Console.WriteLine("【全模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("我来到北京清华大学"); // 默认为精确模式 Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("他来到了网易杭研大厦"); // 默认为精确模式,同时也使用HMM模型 Console.WriteLine("【新词识别】:{0}", string.Join("/ ", segments)); segments = segmenter.CutForSearch("小明硕士毕业于中国科学院计算所,后在日本京都大学深造"); // 搜索引擎模式 Console.WriteLine("【搜索引擎模式】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("结过婚的和尚未结过婚的"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("北京大学生喝进口红酒"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("在北京大学生活区喝进口红酒"); Console.WriteLine("【歧义消除】:{0}", string.Join("/ ", segments)); segments = segmenter.Cut("腾讯视频致力于打造中国最大的在线视频媒体平台,以丰富的内容、极致的观看体验"); Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); segmenter.DeleteWord("湖南"); segmenter.AddWord("湖南"); //segmenter.AddWord("长沙市"); segments = segmenter.Cut("湖南长沙市天心区"); Console.WriteLine("【精确模式】:{0}", string.Join("/ ", segments)); Console.Read(); }