Пример #1
0
        /// <summary>
        /// 对keyword进行分词,将分词的结果返回
        /// </summary>
        public static IEnumerable <string> SplitWords(string keyword)
        {
            IList <string> list     = new List <string>();
            Analyzer       analyzer = new PanGuAnalyzer();
            TokenStream    stream   = analyzer.TokenStream(keyword, new StringReader(keyword));
            ITermAttribute ita      = null;
            bool           hasNext  = stream.IncrementToken();

            while (hasNext)
            {
                ita = stream.GetAttribute <ITermAttribute>();
                list.Add(ita.Term);
                hasNext = stream.IncrementToken();
            }
            return(list);

            //IList<string> list = new List<string>();
            //Analyzer analyzer = new PanGuAnalyzer();
            //TokenStream tokenStream = analyzer.TokenStream("", new StringReader(keyword));
            //Token token = null;
            //while ((token = tokenStream.IncrementToken()) != null)
            //{
            //    // token.TermText()为当前分的词
            //    string word = token.TermText();
            //    list.Add(word);
            //}

            //return list;
        }
Пример #2
0
        static ScoreDoc[] SearchTime(IndexSearcher searcher, string queryString, string field, int numHit, bool inOrder)
        {
            //TopScoreDocCollector collector = TopScoreDocCollector.create(numHit, inOrder);
            Analyzer analyser = new PanGuAnalyzer();

            QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, field, analyser);
            var         querys = queryString.Split('&');

            if (querys != null || querys.Length > 1)
            {
                BooleanQuery query = new BooleanQuery();
                foreach (var str in querys)
                {
                    query.Add(parser.Parse(str), BooleanClause.Occur.MUST);
                }
                TopFieldDocs topField = searcher.Search(query, null, 20, new Sort(new SortField("Time", SortField.STRING_VAL, true)));
                return(topField.scoreDocs);
            }
            else
            {
                Query        query    = parser.Parse(queryString);
                TopFieldDocs topField = searcher.Search(query, null, 20, new Sort(new SortField("Time", SortField.STRING_VAL, true)));
                //searcher.Search(query, collector);

                return(topField.scoreDocs);
            }
        }
Пример #3
0
        /// <summary>
        /// 获取商品信息数据
        /// </summary>
        /// <param name="queryString"></param>
        /// <returns></returns>
        public List <Commodity> QueryIndex(string queryString)
        {
            IndexSearcher searcher = null;

            try
            {
                List <Commodity> ciList = new List <Commodity>();
                Directory        dir    = FSDirectory.Open(StaticConstant.IndexPath);
                searcher = new IndexSearcher(dir);
                Analyzer analyzer = new PanGuAnalyzer();

                //--------------------------------------这里配置搜索条件
                QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", analyzer);
                Query       query  = parser.Parse(queryString);
                Console.WriteLine(query.ToString()); //显示搜索表达式
                TopDocs docs = searcher.Search(query, (Filter)null, 10000);

                foreach (ScoreDoc sd in docs.ScoreDocs)
                {
                    Document doc = searcher.Doc(sd.Doc);
                    ciList.Add(DocumentToCommodityInfo(doc));
                }

                return(ciList);
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Dispose();
                }
            }
        }
Пример #4
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="path">文件夹,索引存放位置</param>
        /// <param name="sources"></param>
        static void CreateIndex(string path, List <Meta> sources)
        {
            var root          = new DirectoryInfo(path);
            var fsLockFactory = new NativeFSLockFactory();

            using (FSDirectory fsRoot = FSDirectory.Open(root, fsLockFactory))
            {
                //创建向索引库写操作对象
                //使用IndexWriter打开directory时会自动对索引库文件上锁
                //Analyzer analyzer = new SimpleAnalyzer();
                Analyzer analyzer = new PanGuAnalyzer();
                using (IndexWriter writer = new IndexWriter(fsRoot, analyzer, !IndexReader.IndexExists(fsRoot), IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    foreach (var source in sources)
                    {
                        Document document = new Document();

                        document.Add(new Field("Title", source.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));

                        document.Add(new Field("Url", source.Url, Field.Store.YES, Field.Index.NOT_ANALYZED));

                        document.Add(new Field("Content", source.Content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));

                        writer.AddDocument(document);
                    }
                }
            }
        }
Пример #5
0
        public string gettxt3(string postStr)
        {
            string       word = "";
            String       text = postStr.Trim();;
            Analyzer     anal = new PanGuAnalyzer();//使用盘古分词
            StringReader sb   = new StringReader(text);
            TokenStream  ts   = anal.ReusableTokenStream("", sb);
            Token        t    = null;

            word = "select top 1 Content from tb_U_Message where 1=1";
            while ((t = ts.Next()) != null)
            {
                Console.WriteLine(t.TermText());
                // Response.Write(t.TermText());
                word += "and Title like'%" + t.TermText() + "%'"; //and Title like'%"+t.TermText()+"%'
            }


            string count = HXD.DBUtility.SQLHelper.ExecuteScalar(word).ToString();

            Repeater += "<MsgType><![CDATA[text]]></MsgType>";
            Repeater += "<Content><![CDATA[" + count + "\n\n如词答案任没有解决您的问题,你可以点击直接询问我们在泰国的达人。]]></Content>";



            return(Repeater);
        }
Пример #6
0
        private static IEnumerable <string> GetSearchTerms(string searchTerm)
        {
            List <string> result   = new List <string>();
            var           analyzer = new PanGuAnalyzer();
            StringReader  sr       = new StringReader(searchTerm);
            TokenStream   stream   = analyzer.TokenStream(null, sr);
            bool          hasnext  = stream.IncrementToken();

            System.DateTime start = System.DateTime.Now;
            ITermAttribute  ita;

            while (hasnext)
            {
                ita = stream.GetAttribute <ITermAttribute>();
                result.Add(ita.Term);
                hasnext = stream.IncrementToken();
            }
            stream.CloneAttributes();
            sr.Close();
            analyzer.Dispose();

            var resultString = string.Join(" ", result);

            return(resultString.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
                   .Concat(new[] { searchTerm })
                   .Distinct(StringComparer.OrdinalIgnoreCase)
                   .Select(Escape));
        }
Пример #7
0
        /// <summary>
        /// 查询所有符合条件的内容
        /// </summary>
        /// <param name="kw">关键词</param>
        /// <param name="segment">提取长度</param>
        /// <returns></returns>
        public static IEnumerable <PostOutputDto> Search(string kw, int segment = 200)
        {
            if (string.IsNullOrEmpty(IndexPath))
            {
                throw new Exception("未设置索引文件夹路径,参数名:" + IndexPath);
            }
            string indexPath = IndexPath;

            using (var analyzer = new PanGuAnalyzer())
            {
                var list   = CutKeywords(kw);
                var result = new ConcurrentQueue <PostOutputDto>();
                Parallel.ForEach(list, k =>
                {
                    if (k.Contains(new[] { @"\?", @"\*", @"\+", @"\-", @"\[", @"\]", @"\{", @"\}", @"\(", @"\)", "�" }))
                    {
                        return;
                    }
                    FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
                    IndexReader reader    = IndexReader.Open(directory, true);
                    var searcher          = new IndexSearcher(reader);
                    QueryParser parser    = new MultiFieldQueryParser(Version.LUCENE_30, new[] { nameof(Post.Id), nameof(Post.Title), nameof(Post.Content), nameof(Post.Author), nameof(Post.Label), nameof(Post.Email), nameof(Post.Keyword) }, analyzer); //多个字段查询
                    Query query           = parser.Parse(k);
                    int n        = 100000;
                    TopDocs docs = searcher.Search(query, null, n);
                    if (docs?.TotalHits != 0 && docs?.ScoreDocs != null)
                    {
                        foreach (ScoreDoc sd in docs.ScoreDocs) //遍历搜索到的结果
                        {
                            Document doc = searcher.Doc(sd.Doc);
                            if (result.Any(p => p.Id == doc.Get(nameof(Post.Id)).ToInt32()))
                            {
                                continue;
                            }
                            var simpleHtmlFormatter = new SimpleHTMLFormatter("<span style='color:red;background-color:yellow;font-size: 1.1em;font-weight:700;'>", "</span>");
                            var highlighter         = new Highlighter(simpleHtmlFormatter, new Segment())
                            {
                                FragmentSize = segment
                            };
                            var content = doc.Get(nameof(Post.Content));
                            if (content.Length <= segment)
                            {
                                segment = content.Length;
                            }
                            result.Enqueue(new PostOutputDto()
                            {
                                Id      = doc.Get(nameof(Post.Id)).ToInt32(),
                                Title   = doc.Get(nameof(Post.Title)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Title))) : doc.Get(nameof(Post.Title)),
                                Content = content.ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, content) : content.Substring(0, segment),
                                Author  = doc.Get(nameof(Post.Author)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Author))) : doc.Get(nameof(Post.Author)),
                                Label   = doc.Get(nameof(Post.Label)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Label))) : doc.Get(nameof(Post.Label)),
                                Email   = doc.Get(nameof(Post.Email)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Email))) : doc.Get(nameof(Post.Email)),
                                Keyword = doc.Get(nameof(Post.Keyword)).ToLower().Contains(k.ToLower()) ? highlighter.GetBestFragment(k, doc.Get(nameof(Post.Keyword))) : doc.Get(nameof(Post.Keyword))
                            });
                        }
                    }
                });
                return(result.Where(p => !string.IsNullOrEmpty(p.Title)).DistinctBy(p => p.Id));
            }
        }
Пример #8
0
        private static IndexWriter GetWriter(String project)
        {
            if (String.IsNullOrWhiteSpace(project))
            {
                project = "NoneName";
            }
            String path = AllConfig.m_path;

            if (m_indexWrite.ContainsKey(project))
            {
                m_indexSearch.TryRemove(project, out var a);
                return(m_indexWrite[project]);
            }
            lock (m_lock)
            {
                if (m_indexWrite.ContainsKey(project))
                {
                    m_indexSearch.TryRemove(project, out var a);
                    return(m_indexWrite[project]);
                }
                IndexWriter fsWriter   = null;
                Boolean     isExiested = File.Exists(Path.Combine(path, "write.lock"));
                FSDirectory fsDir      = FSDirectory.Open(new DirectoryInfo(path));
                Analyzer    analyser   = new PanGuAnalyzer();
                fsWriter = new IndexWriter(fsDir, analyser, !isExiested, IndexWriter.MaxFieldLength.UNLIMITED);
                m_indexWrite.TryAdd(project, fsWriter);
                return(fsWriter);
            }
        }
Пример #9
0
        public static void PrepareIndex(bool isPangu)
        {
            Analyzer analyzer = null;

            if (isPangu)
            {
                analyzer = new PanGuAnalyzer();//盘古Analyzer
            }
            else
            {
                analyzer = new StandardAnalyzer(Version.LUCENE_29);
            }
            DirectoryInfo dirInfo = Directory.CreateDirectory(Config.INDEX_STORE_PATH);

            LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
            IndexWriter        writer    = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            CreateIndex(writer, "jeffreyzhao", "博客园有一个老赵,人格魅力巨大,洋名就叫jeffreyzhao。据我所知,他还是一个胖子,一个钢琴业余爱好者。");
            CreateIndex(writer, "lucene测试", "这是一个测试,关于lucene.net的 关注老赵");
            CreateIndex(writer, "博客园里有牛人", "Hello World. 我认识的一个高手,他拥有广博的知识,有极客的态度,还经常到园子里来看看");
            CreateIndex(writer, "奥巴马", "美国现任总统是奥巴马?确定不是奥巴牛和奥巴羊 不知道问老赵");
            CreateIndex(writer, "奥林匹克", "奥林匹克运动会将来到南美美丽热情的国度巴西,也就是亚马逊河流域的一个地方");
            CreateIndex(writer, "写给自己", "博客园的jeffwong,新的开始,继续努力了");
            writer.Optimize();
            writer.Close();
        }
Пример #10
0
        public static List <string> PanGuSplitWord(string msg)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(msg));

            ITermAttribute ita;
            bool           hasNext = true;
            List <string>  list    = new List <string>();

            while (hasNext)
            {
                ita = tokenStream.GetAttribute <ITermAttribute>();
                list.Add(ita.Term);
                hasNext = tokenStream.IncrementToken();
            }

            analyzer.Close();
            return(list);

            /*Token token;
            *  List<string> list = new List<string>();
            *  while ((token = tokenStream.) != null)
            *  {
            *   list.Add(token.TermText());
            *  }
            *  return list;*/
        }
Пример #11
0
        public List <Student> QueryIndex(string queryString)
        {
            IndexSearcher  searcher = null;
            List <Student> ciList   = null;

            try
            {
                ciList = new List <Student>();
                Directory dir = FSDirectory.Open(IndexHelper.Path);
                searcher = new IndexSearcher(dir);
                Analyzer analyzer = new PanGuAnalyzer();
                //这里配置搜索条件
                QueryParser parser = new QueryParser(Version.LUCENE_30, "title", analyzer);
                Query       query  = parser.Parse(queryString);
                TopDocs     docs   = searcher.Search(query, (Filter)null, 10000);
                foreach (ScoreDoc sd in docs.ScoreDocs)
                {
                    Document doc = searcher.Doc(sd.Doc);
                    ciList.Add(DocumentToCommodityInfo(doc));
                }
                return(ciList);
            }
            catch (Exception e)
            {
                logger.Error("Error:", e);
            }
            finally
            {
                if (searcher != null)
                {
                    searcher.Close();
                }
            }
            return(ciList);
        }
Пример #12
0
        /// <summary>
        /// 生成所有索引
        /// </summary>
        /// <param name="itemCallback"></param>
        public void MakeLuceneIndex(Action <Document> callback)
        {
            #region 中国大学

            ChinaUniversityDao     chinaUniverisityDao = new ChinaUniversityDao();
            List <ChinaUniversity> universityList      = chinaUniverisityDao.GetChinaUniversityList();

            PanGuAnalyzer analyzer = new PanGuAnalyzer(true);

            string textIndexDir = LuceneManager.GetLuceneTextIndexDirectoryPath(LuceneTextIndexType.ChinaUniversity, null);

            List <Document> documentList = new List <Document>();
            foreach (ChinaUniversity university in universityList)
            {
                Document indexDoc = new Document();

                #region 根据需要添加要被索引的数据列

                indexDoc.Add(new NumericField("UniversityId", Field.Store.YES, true).SetIntValue(university.UniversityId));
                indexDoc.Add(new Field("CnName", university.Name.ToLower(), Field.Store.YES, Field.Index.ANALYZED));
                indexDoc.Add(new Field("Pinyin", university.Pinyin.ToLower(), Field.Store.YES, Field.Index.ANALYZED));
                #endregion 根据需要添加要被索引的数据列

                documentList.Add(indexDoc);
            }
            LuceneManager.MakeIndex(documentList, textIndexDir, callback);

            #endregion 中国大学
        }
Пример #13
0
        private List <BookSearchModel> SearchBookContent(string searchWords)
        {
            List <BookSearchModel> bookSearchModelList = new List <BookSearchModel>();
            //1.对搜索条件进行分词
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(searchWords));

            Lucene.Net.Analysis.Token token = null;
            string indexPath = @"D:\lucenedir";
            //string kw = "面向对象";//对用户输入的搜索条件进行拆分。
            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //foreach (string word in kw.Split(' '))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            //{
            //    query.Add(new Term("body", word));
            //}
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            while ((token = tokenStream.Next()) != null)
            {
                query.Add(new Term("body", token.TermText()));
            }
            // query.Add(new Term("body", kw));//body中含有kw的文章
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int             docId       = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document        doc         = searcher.Doc(docId); //找到文档id对应的文档详细信息
                BookSearchModel searchModel = new BookSearchModel();
                searchModel.Id                = int.Parse(doc.Get("ID"));
                searchModel.Title             = doc.Get("title");
                searchModel.ContenDescription = SearchWordHighlight.CreateHightLight(searchWords, doc.Get("body"));
                //this.listBox1.Items.Add(doc.Get("number") + "\n");// 取出放进字段的值
                //this.listBox1.Items.Add(doc.Get("body") + "\n");
                //this.listBox1.Items.Add("-----------------------\n");
                bookSearchModelList.Add(searchModel);
            }
            //将搜索的此插入词库之中
            SearchDetails entity = new SearchDetails()
            {
                Id = Guid.NewGuid(), KeyWords = searchWords, SearchDateTime = DateTime.Now
            };

            SearchDetailsService.AddEntity(entity);
            return(bookSearchModelList);
        }
Пример #14
0
        /// <summary>
        /// 进行搜索
        /// </summary>
        /// <returns></returns>
        public ActionResult Search()
        {
            string kw        = Request["kw"];                 // 获取用户输入的搜索内容
            string indexPath = Server.MapPath("~/lucenedir"); // 从哪里搜索

            // 对用户输入的内容进行分割
            List <string> kws         = new List <string>(); // 定义一个集合用来存储分割后的分词
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(kw.ToString()));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                kws.Add(token.TermText());
            }

            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件

            // 注意:这个类只可以进行单个列条件搜索,如果想要实现多个条件搜索要使用另外一个类
            PhraseQuery query = new PhraseQuery();

            foreach (var word in kws)
            {
                query.Add(new Term("content", word)); // 向content这个列进行搜索
            }

            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.

            // 创建一个list集合用来存储搜索到的结果
            List <BookVieModel> bookList = new List <BookVieModel>();

            for (int i = 0; i < docs.Length; i++)
            {
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int      docId = docs[i].doc;         //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId); //找到文档id对应的文档详细信息

                BookVieModel model = new BookVieModel();
                model.Id    = Convert.ToInt32(doc.Get("Id"));             // 注意:这些字段要和在添加搜索词库的时候保持一致
                model.Title = CreateHightLight(kw, doc.Get("title"));     // 注意:这些字段要和在添加搜索词库的时候保持一致
                // 对搜索到结果中的搜索词进行高亮显示
                model.Content = CreateHightLight(kw, doc.Get("content")); // 注意:这些字段要和在添加搜索词库的时候保持一致

                bookList.Add(model);
            }
            ViewBag.books = bookList;
            ViewBag.kw    = kw;
            return(View("Index"));
        }
Пример #15
0
        public LoggerMqConsume()
        {
            RouteKey  = "Logger_Route_Key";
            QueueName = "Logger_Queue";
            FSDirectory fsDir    = FSDirectory.Open(new DirectoryInfo(INDEX_STORE_PATH));
            Analyzer    analyser = new PanGuAnalyzer();

            fsWriter = new IndexWriter(fsDir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED);
        }
Пример #16
0
        private void button3_Click(object sender, EventArgs e)
        {
            string indexPath = @"C:\Users\杨ShineLon\Desktop\lucenedir"; // 从哪里搜索
            string kw        = textBox1.Text;                           //"面向对象";//对用户输入的搜索条件进行拆分。

            // 对用户输入的内容进行分割
            List <string> kws         = new List <string>(); // 定义一个集合用来存储分割后的分词
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(kw.ToString()));

            Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                kws.Add(token.TermText());
                //Console.WriteLine(token.TermText());
            }

            FSDirectory   directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader   reader    = IndexReader.Open(directory, true);
            IndexSearcher searcher  = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();

            //foreach (string word in kw.Split(' '))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            //{
            //    query.Add(new Term("body", word));
            //}
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));

            //query.Add(new Term("body", kw));//body中含有kw的文章
            foreach (var word in kws)
            {
                query.Add(new Term("body", word));
            }

            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);

            searcher.Search(query, null, collector);                                    //根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            this.listBox1.Items.Clear();
            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int      docId = docs[i].doc;                      //得到查询结果文档的id(Lucene内部分配的id)
                Document doc   = searcher.Doc(docId);              //找到文档id对应的文档详细信息
                this.listBox1.Items.Add(doc.Get("number") + "\n"); // 取出放进字段的值
                this.listBox1.Items.Add(doc.Get("body") + "\n");
                this.listBox1.Items.Add("-----------------------\n");
            }
        }
Пример #17
0
        /// <summary>
        /// 创建索引
        /// </summary>
        /// <param name="Model"></param>
        private void createIndex(News Model)
        {
            //定义索引目录路径
            string path = Server.MapPath("/Indexs");

            //定义一个分词器
            Analyzer analyzer = new PanGuAnalyzer();

            //定义索引用到的目录
            //Lock的作用是防止一个Lucene索引,同一时刻被多个IndexWriter进行写操作;
            //如果一个Lucene索引同时被多个IndexWriter进行写操作,可能造成索引损坏。
            //在一个Lucene索引被锁住后,Lucene索引文件夹内一定有一个write.lock文件,反之,则不一定。
            //IndexWriter 默认使用 NativeFSLockFactory;
            //NativeFSLockFactory的主要好处是,如果JIT出现异常退出,操作系统会删除锁,而不是锁定文件,虽然write.lock依然存在;
            //正常退出的情况下,write.lock也不会被删除,只是Lucene会释放write.lock文件的引用。
            Lucene.Net.Store.Directory d = FSDirectory.Open(new DirectoryInfo(path), new NativeFSLockFactory());

            //如果指定目录中存在索引,则返回true,否则返回假  6217 0001 4002 9603 964

            bool isUpdate = IndexReader.IndexExists(d);

            if (isUpdate)
            {
                //如果当前目录中的索引是锁定状态,则解锁当前目录
                if (IndexWriter.IsLocked(d))
                {
                    IndexWriter.Unlock(d);
                }
            }

            //第三个参数:true:创建索引或覆盖现有索引,false:追加索引
            using (IndexWriter iw = new IndexWriter(d, analyzer, !isUpdate, IndexWriter.MaxFieldLength.LIMITED))
            {
                Random random = new Random();
                for (int i = 100; i < 110; i++)
                {
                    DateTime dt  = DateTime.Now.AddHours(random.Next(100, 1000));
                    Document doc = new Document();
                    //按数字域进行存储
                    doc.Add(new NumericField("Id", Field.Store.YES, true).SetIntValue(Model.Id));

                    doc.Add(new Field("Title", Model.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));

                    doc.Add(new Field("Content", Model.Content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));

                    doc.Add(new Field("AddTime", dt.ToString("yyyy-MM-dd HH:mm:ss"), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));

                    doc.Add(new NumericField("OrderId", Field.Store.YES, true).SetLongValue(Convert.ToInt64(DateTools.DateToString(dt, DateTools.Resolution.SECOND))));

                    iw.AddDocument(doc);
                }
                iw.Optimize();
            }
        }
Пример #18
0
        private void button3_Click(object sender, EventArgs e)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader("面向世界,面向现代化"));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                Console.WriteLine(token.TermText());
            }
        }
Пример #19
0
        private void button6_Click(object sender, EventArgs e)
        {
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(textBox2.Text));
            Token       token       = null;

            while ((token = tokenStream.Next()) != null)
            {
                MessageBox.Show(token.TermText());
            }
        }
Пример #20
0
        public static void Optimize()
        {
            var analyzer = new PanGuAnalyzer();  //new StandardAnalyzer(Version.LUCENE_30);

            using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                analyzer.Close();
                writer.Optimize();
                writer.Dispose();
            }
        }
Пример #21
0
        //
        // GET: /Test/
        public ActionResult Index()
        {
            PanGuAnalyzer analyzer = new PanGuAnalyzer();
            TokenStream   ts       = analyzer.ReusableTokenStream("", new StringReader("供应子母门 非标门 钢质门 进户门 金属门"));
            Token         token;

            while ((token = ts.Next()) != null)
            {
                Response.Write(token.TermText() + ";");
            }
            return(View());
        }
        public static string Delete(string id)
        {
            #region 验证输入参数是否合格
            if (string.IsNullOrEmpty(id))
            {
                throw new Exception("参数id不能为空");
            }
            #endregion



            //Analyzer analyzer = new PanGuAnalyzer();
            //FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());
            //bool isUpdate = IndexReader.IndexExists(directory);
            //if (isUpdate)
            //{
            //    //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁
            //    if (IndexWriter.IsLocked(directory))
            //    {
            //        IndexWriter.Unlock(directory);
            //    }
            //}
            //IndexWriter writer = new IndexWriter(directory, analyzer, !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED);
            ////为避免重复索引,所以先删除id=id的记录,再重新添加
            //writer.DeleteDocuments(new Term("id", id));
            ////对索引文件进行优化
            //writer.Optimize();
            //analyzer.Close();
            //writer.Dispose();
            //directory.Dispose();


            Analyzer    analyzer  = new PanGuAnalyzer();
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());

            /**创建 索引写对象
             * 用于正式 写入索引与文档数据、删除索引与文档数据
             * */
            IndexWriter indexWriter = new IndexWriter(directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED);

            /** 删除所有索引
             * 如果索引库中的索引已经被删除,则重复删除时无效*/
            Term      term  = new Term("id", id);
            TermQuery query = new TermQuery(term);
            indexWriter.DeleteDocuments(query);
            /** 虽然不 commit,也会生效,但建议做提交操作,*/
            indexWriter.Commit();
            /**  关闭流,里面会自动 flush*/
            indexWriter.Optimize();
            indexWriter.Dispose();

            return("");
        }
        public static string DeleteAll()
        {
            ///** 创建 IKAnalyzer 中文分词器
            // * IKAnalyzer():默认使用最细粒度切分算法
            // * IKAnalyzer(boolean useSmart):当为true时,分词器采用智能切分 ;当为false时,分词器迚行最细粒度切分
            // * */
            //Analyzer analyzer = new IKAnalyzer();
            ///** 指定索引和文档存储的目录
            // * 如果此目录不是 Lucene 的索引目录,则不进行任何操作*/
            //Directory directory = FSDirectory.open(indexDir);

            ///** 创建 索引写配置对象,传入分词器
            // * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
            // * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
            // * */
            //IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

            ///**创建 索引写对象
            // * 用于正式 写入索引与文档数据、删除索引与文档数据
            // * */
            //IndexWriter indexWriter = new IndexWriter(directory, config);

            ///** 删除所有索引
            // * 如果索引库中的索引已经被删除,则重复删除时无效*/
            //indexWriter.deleteAll();

            ///** 虽然不 commit,也会生效,但建议做提交操作,*/
            //indexWriter.commit();
            ///**  关闭流,里面会自动 flush*/
            //indexWriter.close();



            Analyzer    analyzer  = new PanGuAnalyzer();
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());

            /**创建 索引写对象
             * 用于正式 写入索引与文档数据、删除索引与文档数据
             * */
            IndexWriter indexWriter = new IndexWriter(directory, analyzer, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED);

            /** 删除所有索引
             * 如果索引库中的索引已经被删除,则重复删除时无效*/
            indexWriter.DeleteAll();

            /** 虽然不 commit,也会生效,但建议做提交操作,*/
            indexWriter.Commit();
            /**  关闭流,里面会自动 flush*/
            indexWriter.Dispose();

            return("");
        }
Пример #24
0
        /// <summary>
        /// 盘古分词
        /// </summary>
        /// <param name="msg">需要进行拆分的字符串</param>
        /// <returns>拆分结果</returns>
        public static List <string> PanguSplitWords(string msg)
        {
            List <string> list        = new List <string>();
            Analyzer      analyzer    = new PanGuAnalyzer();
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(msg));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }
            return(list);
        }
Пример #25
0
        /// <summary>
        /// 对用户输入的搜索的条件进行分词
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        private static string[] SplitWord(string str)
        {
            List <string> list        = new List <string>();
            Analyzer      analyzer    = new PanGuAnalyzer();                             //指定盘古分词
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(str)); //

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }
            return(list.ToArray());
        }
Пример #26
0
        /// <summary>
        /// 分页获取商品信息数据
        /// </summary>
        /// <param name="queryString"></param>
        /// <param name="fieldName"></param>
        /// <param name="pageIndex">第一页为1</param>
        /// <param name="pageSize"></param>
        /// <param name="totalCount"></param>
        /// <param name="filter"></param>
        /// <param name="sort"></param>
        /// <param name="fieldModelList"></param>
        /// <returns></returns>
        public List <T> QueryIndexPage(string queryString, string fieldName, int pageIndex, int pageSize, out int totalCount, Filter filter, Sort sort, IEnumerable <FieldDataModel> fieldModelList)
        {
            List <T> modelList = null;

            using (Analyzer analyzer = new PanGuAnalyzer())
            {
                //--------------------------------------这里配置搜索条件
                QueryParser parser = new QueryParser(LuceneUtil.Version.LUCENE_30, fieldName, analyzer);
                Query       query  = parser.Parse(queryString);
                modelList = QueryIndexPage(query, pageIndex, pageSize, out totalCount, filter, sort, fieldModelList);
            }
            return(modelList);
        }
Пример #27
0
        //private CustomLogger m_logger = new CustomLogger(typeof(LuceneQuery<T>));

        #endregion Identity

        #region QueryIndex

        /// <summary>
        /// 获取商品信息数据
        /// </summary>
        /// <param name="queryString"></param>
        /// <param name="fieldName"></param>
        /// <param name="fieldModelList"></param>
        /// <param name="listCount"></param>
        /// <returns></returns>
        public List <T> QueryIndex(string queryString, string fieldName, IEnumerable <FieldDataModel> fieldModelList, int listCount = 1000)
        {
            List <T> modelList = null;

            using (Analyzer analyzer = new PanGuAnalyzer())
            {
                //--------------------------------------这里配置搜索条件
                QueryParser parser = new QueryParser(LuceneUtil.Version.LUCENE_30, fieldName, analyzer);
                Query       query  = parser.Parse(queryString);
                modelList = QueryIndex(query, fieldModelList, listCount);
            }
            return(modelList);
        }
Пример #28
0
        private string[] SplitWords(string content)
        {
            List <string> strList     = new List <string>();
            Analyzer      analyzer    = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法
            TokenStream   tokenStream = analyzer.TokenStream("", new StringReader(content));

            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {                                  //Next继续分词 直至返回null
                strList.Add(token.TermText()); //得到分词后结果
            }
            return(strList.ToArray());
        }
Пример #29
0
        public static string[] GetKeyWords(string str)
        {
            var         list        = new List <string>();
            Analyzer    analyzer    = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));
            Token       token;

            while ((token = tokenStream.Next()) != null)
            {
                list.Add(token.TermText());
            }
            return(list.ToArray());
        }
Пример #30
0
        static ScoreDoc[] Search(IndexSearcher searcher, string queryString, string field, int numHit, bool inOrder)
        {
            TopScoreDocCollector collector = TopScoreDocCollector.create(numHit, inOrder);
            Analyzer             analyser  = new PanGuAnalyzer();

            QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, field, analyser);

            Query query = parser.Parse(queryString);

            searcher.Search(query, collector);

            return(collector.TopDocs().scoreDocs);
        }