public static string[] SplitWords(string content) { List<string> strList = new List<string>(); Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); Lucene.Net.Analysis.Token token = null; while((token = tokenStream.Next()) != null) { //Next继续分词 直至返回null strList.Add(token.TermText()); //得到分词后结果 } return strList.ToArray(); }
/// <summary> /// 将字符串经过盘古分词之后返回字符串集合 /// </summary> /// <param name="str"></param> /// <returns></returns> public static List<string> ChangeStringToSegment(string str) { Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); Lucene.Net.Analysis.Token token = null; List<string> list = new List<string>(); while ((token = tokenStream.Next()) != null) { list.Add(token.TermText()); } return list; }
/// <summary> /// 对索引分词 /// </summary> /// <param name="str"></param> /// <returns></returns> public static string[] SqlitIndexWord(string str) { //盘古分词 //对输入的搜索条件进行分词 List<string> list = new List<string>(); Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); Lucene.Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { Console.WriteLine(token.TermText()); list.Add(token.TermText()); } return list.ToArray(); }
/// <summary> /// 生成LUCENE索引数据 /// </summary> /// <param name="indexDocList">索引数据文档列表</param> /// <param name="directoryPath">索引文件路径</param> /// <param name="callback">回调方法</param> public static void MakeIndex(List<Document> indexDocList, string directoryPath, Action<Document> callback) { try { PanGuAnalyzer analyzer = new PanGuAnalyzer(true); string textIndexDir = directoryPath; if (!System.IO.Directory.Exists(textIndexDir)) { System.IO.Directory.CreateDirectory(textIndexDir); } Lucene.Net.Store.Directory indexDirectory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(textIndexDir), new NativeFSLockFactory()); if (IndexReader.IndexExists(indexDirectory)) { if (IndexWriter.IsLocked(indexDirectory)) { IndexWriter.Unlock(indexDirectory); } } IndexWriter indexWriter = new IndexWriter(indexDirectory, analyzer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED); if (indexDocList != null && indexDocList.Count > 0) { foreach (var item in indexDocList) { indexWriter.AddDocument(item, analyzer); if (callback != null) { callback(item); } } } indexWriter.Optimize(); indexWriter.Close(); analyzer.Close(); } catch (Exception ex) { LogHelper.Info(typeof(LuceneManager), ex.ToString()); } }
//public static string[] SplitWords(string content) //{ // List<string> strList = new List<string>(); // Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 // TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); // Lucene.Net.Analysis.Token token = null; // while ((token = tokenStream.Next()) != null) // { //Next继续分词 直至返回null // strList.Add(token.TermText()); //得到分词后结果 // } // return strList.ToArray(); //} #region 分词测试 /// <summary> /// 分词测试 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public string Token(string keyword) { string ret = ""; System.IO.StringReader reader = new System.IO.StringReader(keyword); Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream ts = analyzer.TokenStream(keyword, reader); bool hasNext = ts.IncrementToken(); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; while (hasNext) { ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); ret += ita.Term + "|"; hasNext = ts.IncrementToken(); } ts.CloneAttributes(); reader.Close(); analyzer.Close(); return ret; }
public static string[] SplitWords(string content) { List<string> strList = new List<string>(); Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; bool hasNext = tokenStream.IncrementToken(); while (hasNext) { ita = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); strList.Add(ita.Term); hasNext = tokenStream.IncrementToken(); } return strList.ToArray(); }
public static string[] SplitWords(string content) { List<string> strList = new List<string>(); //指定使用盘古 PanGuAnalyzer 分词算法 Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); ITermAttribute term = tokenStream.AddAttribute<ITermAttribute>(); ITermAttribute tempTerm = null; while (tokenStream.IncrementToken()) { //Next继续分词 直至返回null //得到分词后结果 if (tokenStream.HasAttribute<ITermAttribute>()) { tempTerm = tokenStream.GetAttribute<ITermAttribute>(); strList.Add(tempTerm.Term); } } return strList.ToArray(); }
public static string QueryParserWord(string content) { StringBuilder sb = new StringBuilder(); Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; bool hasNext = tokenStream.IncrementToken(); while (hasNext) { ita = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); sb.Append(ita.Term); sb.Append(" "); hasNext = tokenStream.IncrementToken(); } return sb.ToString(); }
public List<LinkItem> Search(GenerateSchemeViewModel model, out int totalHits) { if (!Directory.Exists(LuceneCommon.IndexOutDoorDirectory)) { totalHits = 0; return new List<LinkItem>(); } var combineQuery = new BooleanQuery(); SortField sortField = GetSortField(model.generateType); #region 用户状态 var memberStatusQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.MemberStatus, (int)MemberStatus.CompanyAuth, 99, true, true); combineQuery.Add(memberStatusQuery, Occur.MUST); #endregion #region 审核状态查询构建 var verifyStatus = NumericRangeQuery.NewIntRange(OutDoorIndexFields.Status, (int)OutDoorStatus.ShowOnline, 99, true, true); combineQuery.Add(verifyStatus, Occur.MUST); #endregion #region 媒体类别查询 if (!string.IsNullOrEmpty(model.mediaCode)) { var mediaCodes = model.mediaCode.Split(',').Select(x => Convert.ToInt32(x)); var mediaCodeCombineQuery = new BooleanQuery(); foreach (var code in mediaCodes) { var maxCode = Utilities.GetMaxCode(code); var mediaCodeQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.MediaCateCode, code, maxCode, true, true); mediaCodeCombineQuery.Add(mediaCodeQuery, Occur.SHOULD); } combineQuery.Add(mediaCodeCombineQuery, Occur.MUST); } #endregion #region 媒体档期查询 if (!string.IsNullOrEmpty(model.dq)) { var minValue = (DateTime.Now.AddYears(-10)).Ticks; var maxValue = Convert.ToDateTime(model.dq).Ticks; var DeadLineQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.DeadLine, Convert.ToInt64(minValue), Convert.ToInt64(maxValue), true, true); combineQuery.Add(DeadLineQuery, Occur.MUST); } #endregion #region 地区查询 if (!string.IsNullOrEmpty(model.cityCode)) { var cityCodes = model.cityCode.Split(',').Select(x => Convert.ToInt32(x)); var cityCodeCombineQuery = new BooleanQuery(); foreach (var code in cityCodes) { var maxCode = Utilities.GetMaxCode(code); var cityCodeQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.CityCateCode, code, maxCode, true, true); cityCodeCombineQuery.Add(cityCodeQuery, Occur.SHOULD); } combineQuery.Add(cityCodeCombineQuery, Occur.MUST); } #endregion #region 关键字查询 if (!string.IsNullOrEmpty(model.formatCate) || !string.IsNullOrEmpty(model.crowdCate) || !string.IsNullOrEmpty(model.industryCate) || !string.IsNullOrEmpty(model.purposeCate)) { var fields = new[] { OutDoorIndexFields.IndustryCate, OutDoorIndexFields.CrowdCate, OutDoorIndexFields.PurposeCate, OutDoorIndexFields.FormatName }; var keywords = (string.IsNullOrEmpty(model.formatCate) ? string.Empty : model.formatCate + ",") + (string.IsNullOrEmpty(model.crowdCate) ? string.Empty : model.crowdCate + ",") + (string.IsNullOrEmpty(model.industryCate) ? string.Empty : model.industryCate + ",") + (string.IsNullOrEmpty(model.purposeCate) ? string.Empty : model.purposeCate); var analyzer = new PanGuAnalyzer(); //var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer); //conjuction 一起选择 var conjuctionQuery = new BooleanQuery(); conjuctionQuery.Boost = 2.0f; //disjunction 分离 var disjunctionQuery = new BooleanQuery(); disjunctionQuery.Boost = 0.1f; //wildCard 通配符 var wildCardQuery = new BooleanQuery(); wildCardQuery.Boost = 0.5f; var escapedSearchTerm = Escape(keywords); foreach (var term in GetSearchTerms(keywords)) { var termQuery = queryParser.Parse(term); conjuctionQuery.Add(termQuery, Occur.MUST); disjunctionQuery.Add(termQuery, Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*")); wildCardTermQuery.Boost = 0.7f; wildCardQuery.Add(wildCardTermQuery, Occur.SHOULD); } } //关键查询 var keywordsQuery = conjuctionQuery.Combine(new Query[] { conjuctionQuery, disjunctionQuery, wildCardQuery }); combineQuery.Add(keywordsQuery, Occur.MUST); } #endregion #region 媒体价格区间查询 if (model.priceCate != 0) { var rangeValue = EnumHelper.GetPriceValue(model.priceCate); if (rangeValue.Max > 99999) { rangeValue.Max = 1000; } var PriceQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Price, 0, Convert.ToDouble(rangeValue.Max), true, true); combineQuery.Add(PriceQuery, Occur.MUST); } #endregion using (var directory = new SimpleFSDirectory(new DirectoryInfo(LuceneCommon.IndexOutDoorDirectory))) { var searcher = new IndexSearcher(directory, readOnly: true); var results = searcher.Search(combineQuery, filter: null, n: 30, sort: new Sort(sortField)); var keys = results.ScoreDocs.Skip(0) .Select(c => GetMediaItem(searcher.Doc(c.Doc))) .ToList(); totalHits = results.TotalHits; searcher.Dispose(); return keys; } }
/// <summary> /// /// </summary> public void IndexCreate() { analyzer = new PanGuAnalyzer(); IndexWriter writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexDirectory)),analyzer, true, IndexWriter.MaxFieldLength.LIMITED); }
private static Query ParseQuery(ListSearchItemViewModel queryParams, SearchFilter searchFilter) { var combineQuery = new BooleanQuery(); #region 关键字查询构建 if (!String.IsNullOrWhiteSpace(searchFilter.SearchTerm)) { var fields = new[] { OutDoorIndexFields.Title, OutDoorIndexFields.Description, OutDoorIndexFields.AreaAtt, OutDoorIndexFields.MediaCateName, OutDoorIndexFields.CityName, OutDoorIndexFields.ProvinceName, OutDoorIndexFields.PMediaCateName, OutDoorIndexFields.FormatName, OutDoorIndexFields.PeriodName, OutDoorIndexFields.OwnerCateName }; var analyzer = new PanGuAnalyzer(); //var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer); var query = queryParser.Parse(searchFilter.SearchTerm); var conjuctionQuery = new BooleanQuery(); conjuctionQuery.Boost = 2.0f; var disjunctionQuery = new BooleanQuery(); disjunctionQuery.Boost = 0.1f; var wildCardQuery = new BooleanQuery(); wildCardQuery.Boost = 0.5f; var escapedSearchTerm = Escape(searchFilter.SearchTerm); var exactIdQuery = new TermQuery(new Term(OutDoorIndexFields.Title, escapedSearchTerm)); exactIdQuery.Boost = 2.5f; var wildCardIdQuery = new WildcardQuery(new Term(OutDoorIndexFields.Title, "*" + escapedSearchTerm + "*")); foreach (var term in GetSearchTerms(searchFilter.SearchTerm)) { var termQuery = queryParser.Parse(term); conjuctionQuery.Add(termQuery, Occur.MUST); disjunctionQuery.Add(termQuery, Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*")); wildCardTermQuery.Boost = 0.7f; wildCardQuery.Add(wildCardTermQuery, Occur.SHOULD); } } //关键查询 var keywordsQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }); combineQuery.Add(keywordsQuery, Occur.MUST); } #endregion #region 审核状态查询构建 var verifyStatus = NumericRangeQuery.NewIntRange(OutDoorIndexFields.Status, (int)OutDoorStatus.ShowOnline, 99, true, true); combineQuery.Add(verifyStatus, Occur.MUST); #endregion #region 省份查询 if (!String.IsNullOrEmpty(queryParams.Province) && queryParams.Province != "quanguo") { int ProvinceValue = EnumHelper.GetProvinceValue(queryParams.Province); var provinceQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.Province, ProvinceValue, ProvinceValue, true, true); combineQuery.Add(provinceQuery, Occur.MUST); } #endregion #region 城市查询 if (queryParams.City != 0) { var cityQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.City, queryParams.City, queryParams.City, true, true); combineQuery.Add(cityQuery, Occur.MUST); } #endregion #region 认证状态 if (queryParams.AuthStatus != 0) { var authStatusQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.AuthStatus, queryParams.AuthStatus, queryParams.AuthStatus, true, true); combineQuery.Add(authStatusQuery, Occur.MUST); } #endregion #region 经纬度搜索 if (queryParams.MinX != 0) { var latQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Lat, queryParams.MinX, queryParams.MaxX, true, true); var lngQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Lng, queryParams.MinY, queryParams.MaxY, true, true); combineQuery.Add(latQuery, Occur.MUST); combineQuery.Add(lngQuery, Occur.MUST); } #endregion #region 媒体类别查询 if (queryParams.MediaCode != 0) { var mediaCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.PMediaCode, queryParams.MediaCode, queryParams.MediaCode, true, true); combineQuery.Add(mediaCodeQuery, Occur.MUST); } #endregion #region 媒体子类别查询 if (queryParams.ChildMediaCode != 0) { var ChildMediaCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.MediaCode, queryParams.ChildMediaCode, queryParams.ChildMediaCode, true, true); combineQuery.Add(ChildMediaCodeQuery, Occur.MUST); } #endregion #region 媒体表现形式查询 if (queryParams.FormatCode != 0) { var FormatCodeCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.FormatCode, queryParams.FormatCode, queryParams.FormatCode, true, true); combineQuery.Add(FormatCodeCodeQuery, Occur.MUST); } #endregion #region 媒体所有权查询 if (queryParams.OwnerCode != 0) { var OwnerCodeCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.OwnerCode, queryParams.OwnerCode, queryParams.OwnerCode, true, true); combineQuery.Add(OwnerCodeCodeQuery, Occur.MUST); } #endregion #region 媒体购买周期查询 if (queryParams.PeriodCode != 0) { var PeriodCodeCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.PeriodCode, queryParams.PeriodCode, queryParams.PeriodCode, true, true); combineQuery.Add(PeriodCodeCodeQuery, Occur.MUST); } #endregion #region 媒体价格区间查询 if (queryParams.Price != 0) { var rangeValue = EnumHelper.GetPriceValue(queryParams.Price); var PriceQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Price, Convert.ToDouble(rangeValue.Min), Convert.ToDouble(rangeValue.Max), true, true); combineQuery.Add(PriceQuery, Occur.MUST); } #endregion return combineQuery; }
private static Query ParseQuery(QueryTerm queryTerm, SearchFilter searchFilter) { var combineQuery = new BooleanQuery(); #region 关键字查询构建 if (!String.IsNullOrWhiteSpace(searchFilter.SearchTerm)) { var fields = new[] { OutDoorIndexFields.Title, OutDoorIndexFields.Description, OutDoorIndexFields.AreaCate, OutDoorIndexFields.IndustryCate, OutDoorIndexFields.CrowdCate, OutDoorIndexFields.PurposeCate, OutDoorIndexFields.MediaCateName, OutDoorIndexFields.CityCateName, OutDoorIndexFields.FormatName, OutDoorIndexFields.PeriodName, OutDoorIndexFields.OwnerName }; var analyzer = new PanGuAnalyzer(); //var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer); var query = queryParser.Parse(searchFilter.SearchTerm); //conjuction 一起选择 var conjuctionQuery = new BooleanQuery(); conjuctionQuery.Boost = 2.0f; //disjunction 分离 var disjunctionQuery = new BooleanQuery(); disjunctionQuery.Boost = 0.1f; //wildCard 通配符 var wildCardQuery = new BooleanQuery(); wildCardQuery.Boost = 0.5f; var escapedSearchTerm = Escape(searchFilter.SearchTerm); var exactIdQuery = new TermQuery(new Term(OutDoorIndexFields.Title, escapedSearchTerm)); exactIdQuery.Boost = 2.5f; var wildCardIdQuery = new WildcardQuery(new Term(OutDoorIndexFields.Title, "*" + escapedSearchTerm + "*")); foreach (var term in GetSearchTerms(searchFilter.SearchTerm)) { var termQuery = queryParser.Parse(term); conjuctionQuery.Add(termQuery, Occur.MUST); disjunctionQuery.Add(termQuery, Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*")); wildCardTermQuery.Boost = 0.7f; wildCardQuery.Add(wildCardTermQuery, Occur.SHOULD); } } //关键查询 var keywordsQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }); combineQuery.Add(keywordsQuery, Occur.MUST); } #endregion #region 指定媒体ID查询 if (queryTerm.MediaID != 0) { var mediaIdQuery = new TermQuery(new Term(OutDoorIndexFields.ID, queryTerm.MediaID.ToString())); combineQuery.Add(mediaIdQuery, Occur.MUST); } #endregion #region 用户状态 var memberStatusQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.MemberStatus, (int)MemberStatus.CompanyAuth, 99, true, true); combineQuery.Add(memberStatusQuery, Occur.MUST); #endregion #region 审核状态查询构建 var verifyStatus = NumericRangeQuery.NewIntRange(OutDoorIndexFields.Status, (int)OutDoorStatus.ShowOnline, 99, true, true); combineQuery.Add(verifyStatus, Occur.MUST); #endregion #region 指定用户ID查询 if (queryTerm.MemberID != 0) { var memberIdQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.MemberID, queryTerm.MemberID, queryTerm.MemberID, true, true); combineQuery.Add(memberIdQuery, Occur.MUST); } #endregion #region 城市查询 if (queryTerm.City != 0) { var cityQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.CityCateCode, queryTerm.CityCateCode, queryTerm.CityMaxCode, true, true); combineQuery.Add(cityQuery, Occur.MUST); } #endregion #region 认证状态 if (queryTerm.AuthStatus != 0) { var authStatusQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.AuthStatus, queryTerm.AuthStatus, queryTerm.AuthStatus, true, true); combineQuery.Add(authStatusQuery, Occur.MUST); } #endregion #region 经纬度搜索 if (queryTerm.MinX != 0) { var latQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Lat, queryTerm.MinX, queryTerm.MaxX, true, true); var lngQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Lng, queryTerm.MinY, queryTerm.MaxY, true, true); combineQuery.Add(latQuery, Occur.MUST); combineQuery.Add(lngQuery, Occur.MUST); } #endregion #region 媒体类别查询 if (queryTerm.MediaCode != 0) { var mediaCodeQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.MediaCateCode, queryTerm.MediaCateCode, queryTerm.MediaMaxCode, true, true); combineQuery.Add(mediaCodeQuery, Occur.MUST); } #endregion #region 媒体表现形式查询 if (queryTerm.FormatCode != 0) { var FormatCodeCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.FormatCode, queryTerm.FormatCode, queryTerm.FormatCode, true, true); combineQuery.Add(FormatCodeCodeQuery, Occur.MUST); } #endregion #region 媒体所有权查询 //if (queryTerm.OwnerCode != 0) //{ // var OwnerCodeCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.OwnerCode, // queryTerm.OwnerCode, queryTerm.OwnerCode, true, true); // combineQuery.Add(OwnerCodeCodeQuery, Occur.MUST); //} #endregion #region 媒体购买周期查询 if (queryTerm.PeriodCode != 0) { var PeriodCodeCodeQuery = NumericRangeQuery.NewIntRange(OutDoorIndexFields.PeriodCode, queryTerm.PeriodCode, queryTerm.PeriodCode, true, true); combineQuery.Add(PeriodCodeCodeQuery, Occur.MUST); } #endregion #region 媒体价格区间查询 if (queryTerm.Price != 0) { var rangeValue = EnumHelper.GetPriceValue(queryTerm.Price); var PriceQuery = NumericRangeQuery.NewDoubleRange(OutDoorIndexFields.Price, Convert.ToDouble(rangeValue.Min), Convert.ToDouble(rangeValue.Max), true, true); combineQuery.Add(PriceQuery, Occur.MUST); } #endregion #region 媒体档期查询 if (queryTerm.DeadLine != 0) { var minValue = (DateTime.Now.AddYears(-10)).Ticks; var maxValue = (new DateTime(DateTime.Now.Year, queryTerm.DeadLine, 1)).Ticks; var DeadLineQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.DeadLine, Convert.ToInt64(minValue), Convert.ToInt64(maxValue), true, true); combineQuery.Add(DeadLineQuery, Occur.MUST); } #endregion #region 媒体档期时间查询 if (!string.IsNullOrEmpty(queryTerm.Dq)) { var minValue = (DateTime.Now.AddYears(-10)).Ticks; var maxValue = Convert.ToDateTime(queryTerm.Dq).Ticks; var DqQuery = NumericRangeQuery.NewLongRange(OutDoorIndexFields.DeadLine, Convert.ToInt64(minValue), Convert.ToInt64(maxValue), true, true); combineQuery.Add(DqQuery, Occur.MUST); } #endregion return combineQuery; }
private static IEnumerable<string> GetSearchTerms(string searchTerm) { List<string> result = new List<string>(); var analyzer = new PanGuAnalyzer(); StringReader sr = new StringReader(searchTerm); TokenStream stream = analyzer.TokenStream(null, sr); bool hasnext = stream.IncrementToken(); System.DateTime start = System.DateTime.Now; ITermAttribute ita; while (hasnext) { ita = stream.GetAttribute<ITermAttribute>(); result.Add(ita.Term); hasnext = stream.IncrementToken(); } stream.CloneAttributes(); sr.Close(); analyzer.Dispose(); var resultString = string.Join(" ", result); return resultString.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries) .Concat(new[] { searchTerm }) .Distinct(StringComparer.OrdinalIgnoreCase) .Select(Escape); }
private static void EnsureIndexWriterCore(bool creatingIndex) { if (!Directory.Exists(LuceneCommon.IndexOutDoorDirectory)) { Directory.CreateDirectory(LuceneCommon.IndexOutDoorDirectory); } var analyzer = new PanGuAnalyzer(); var directoryInfo = new DirectoryInfo(LuceneCommon.IndexOutDoorDirectory); var directory = new SimpleFSDirectory(directoryInfo); _indexWriter = new IndexWriter(directory, analyzer, create: creatingIndex, mfl: IndexWriter.MaxFieldLength.UNLIMITED); }
private static Query ParseQuery(SearchFilter searchFilter) { if (String.IsNullOrWhiteSpace(searchFilter.SearchTerm)) { return new MatchAllDocsQuery(); } var fields = new[] { "Title", "Description", "CompanyName", "AreaAtt", "MediaCateName", "CityName", "ProvinceName", "PMediaCateName", "FormatName", "PeriodName", "OwnerCateName" }; var analyzer = new PanGuAnalyzer(); //var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion); var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields, analyzer); var query = queryParser.Parse(searchFilter.SearchTerm); // All terms in the multi-term query appear in at least one of the fields. var conjuctionQuery = new BooleanQuery(); conjuctionQuery.Boost = 2.0f; // Some terms in the multi-term query appear in at least one of the fields. var disjunctionQuery = new BooleanQuery(); disjunctionQuery.Boost = 0.1f; // Suffix wildcard search e.g. jquer* var wildCardQuery = new BooleanQuery(); wildCardQuery.Boost = 0.5f; //// Escape the entire term we use for exact searches. var escapedSearchTerm = Escape(searchFilter.SearchTerm); var exactIdQuery = new TermQuery(new Term("Title", escapedSearchTerm)); exactIdQuery.Boost = 2.5f; var wildCardIdQuery = new WildcardQuery(new Term("Title", "*" + escapedSearchTerm + "*")); foreach (var term in GetSearchTerms(searchFilter.SearchTerm)) { var termQuery = queryParser.Parse(term); conjuctionQuery.Add(termQuery, Occur.MUST); disjunctionQuery.Add(termQuery, Occur.SHOULD); foreach (var field in fields) { var wildCardTermQuery = new WildcardQuery(new Term(field, term + "*")); wildCardTermQuery.Boost = 0.7f; wildCardQuery.Add(wildCardTermQuery, Occur.SHOULD); } } var combinedQuery = conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery }); if (searchFilter.SortProperty == SortProperty.Hit) { // If searching by relevance, boost scores by download count. var downloadCountBooster = new FieldScoreQuery("Hit", FieldScoreQuery.Type.INT); return new CustomScoreQuery(combinedQuery, downloadCountBooster); } return combinedQuery; }
private static string[] SplitWords(string content) { List<string> strList = new List<string>(); PanGuAnalyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); while(tokenStream.IncrementToken()) { var ita = tokenStream.GetAttribute<ITermAttribute>(); strList.Add(ita.Term); } return strList.ToArray(); }
/// <summary> /// 盘古分词 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> public static string[] PanGuSplit(string str) { Analyzer analyzer = new PanGuAnalyzer();//指定盘古分词算法。 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); Lucene.Net.Analysis.Token token = null; List<string> list = new List<string>(); while ((token = tokenStream.Next()) != null) { list.Add(token.TermText()); } return list.ToArray(); }