public virtual void TestSynonymsPhrase() { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.Add(new Term("field", "old")); expected.Add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") }); QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "old dogs")); }
private static void Main(string[] args) { // Ensures index backwards compatibility var AppLuceneVersion = LuceneVersion.LUCENE_48; var indexLocation = @"Index"; var dir = FSDirectory.Open(indexLocation); //create an analyzer to process the text var analyzer = new StandardAnalyzer(AppLuceneVersion); //create an index writer var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer); var writer = new IndexWriter(dir, indexConfig); var source = new { Name = "Kermit the Frog", FavoritePhrase = "The quick brown fox jumps over the lazy dog" }; Document doc = new Document { // StringField indexes but doesn't tokenize new StringField("name", source.Name, Field.Store.YES), new TextField("favoritePhrase", source.FavoritePhrase, Field.Store.YES) }; writer.AddDocument(doc); writer.Flush(triggerMerge: false, applyAllDeletes: false); // search with a phrase var phrase = new MultiPhraseQuery(); phrase.Add(new Term("favoritePhrase", "brown")); phrase.Add(new Term("favoritePhrase", "fox")); // re-use the writer to get real-time updates var searcher = new IndexSearcher(writer.GetReader(applyAllDeletes: true)); var hits = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); float score = hit.Score; Console.WriteLine("--结果 num {0}, 耗时 {1}", 1, score); Console.WriteLine("--ID: {0}", foundDoc.Get("name")); Console.WriteLine("--Text found: {0}" + Environment.NewLine, foundDoc.Get("favoritePhrase")); //hit.Score.Dump("Score"); //foundDoc.Get("name").Dump("Name"); //foundDoc.Get("favoritePhrase").Dump("Favorite Phrase"); } }
public virtual void TestCJKSynonymsPhrase() { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.Add(new Term("field", "中")); expected.Add(new Term[] { new Term("field", "国"), new Term("field", "國") }); QueryBuilder builder = new QueryBuilder(new MockCJKSynonymAnalyzer()); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国")); expected.Slop = 3; Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国", 3)); }
public void TestFlattenMultiPhraseQuery() { var query = new MultiPhraseQuery(); query.Add(new[] { new Term(F, "a1"), new Term(F, "a2") }); query.Add(new[] { new Term(F, "b1"), new Term(F, "b2") }); var fieldQuery = new FieldQuery(query, true, true); var flatQueries = new HashSet <Query>(); fieldQuery.flatten(query, flatQueries); AssertCollectionQueries(flatQueries, Tq("a1"), Tq("a2"), Tq("b1"), Tq("b2")); }
public virtual void TestCJKSynonymsPhrase() { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.Add(new Index.Term("field", "中")); expected.Add(new Index.Term[] { new Index.Term("field", "国"), new Index.Term("field", "國") }); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer()); qp.DefaultOperator = (Operator.AND); assertEquals(expected, qp.Parse("\"中国\"")); expected.Boost = (2.0f); assertEquals(expected, qp.Parse("\"中国\"^2")); expected.Slop = (3); assertEquals(expected, qp.Parse("\"中国\"~3^2")); }
public override IList <IndexFile> Search(SearchingOptions options) { // var analyzer = new SpanishAnalyzer(LuceneVersion.LUCENE_48); var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48); var phrase = new MultiPhraseQuery(); phrase.Add(new Term("contents", options.SearchTerm)); var indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer); var indexDir = Path.Combine(options.DataDir, ".luceneindex"); var dir = FSDirectory.Open(indexDir); var dirReader = DirectoryReader.Open(dir); using (var writer = new IndexWriter(dir, indexConfig)) { var searcher = new IndexSearcher(writer.GetReader(true)); var hits = searcher.Search(phrase, options.DocCount).ScoreDocs; var indexFileList = from ScoreDoc hit in hits let doc = searcher.Doc(hit.Doc) select new IndexFile { Score = hit.Score, Path = doc.Get("path"), Modified = doc.Get("modified") }; var result = indexFileList.ToList(); return(result); } }
public string FetchResults(string json) { #if LUCENE List <SearchArtTextRequest> resultList = new List <SearchArtTextRequest>(); var searchRequest = JsonConvert.DeserializeObject <SearchRequest>(json); var keyword = searchRequest.keyword; // search with a phrase var phrase = new MultiPhraseQuery(); phrase.Add(new Term("text", keyword)); // re-use the writer to get real-time updates var searcher = new IndexSearcher(writer.GetReader(applyAllDeletes: true)); var hits = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); resultList.Add(new SearchArtTextRequest { ArtId = foundDoc.Get("artId"), Text = HighlightText(searchRequest.keyword, foundDoc.Get("text")) }); } Console.Error.WriteLine("---"); Console.Error.WriteLine(hits.Length); Console.Error.WriteLine("---"); Console.Error.WriteLine(json); Console.Error.WriteLine("---"); Console.Error.WriteLine(JsonConvert.SerializeObject(resultList)); Console.Error.WriteLine("---"); return(JsonConvert.SerializeObject(resultList)); #endif return("[]"); }
public IEnumerable <(string Term, T Data)> Search(string[] searchTerms, int maxHits = 100) { if (!_reader.IsCurrent()) { using (_reader) { _reader = DirectoryReader.OpenIfChanged(_reader); _searcher = new IndexSearcher(_reader); } } MultiPhraseQuery query = new MultiPhraseQuery(); foreach (string searchTerm in searchTerms) { query.Add(new Term(TermsFieldName, searchTerm)); } TopDocs hits = _searcher.Search(query, maxHits); foreach (ScoreDoc topDoc in hits.ScoreDocs) { Document document = _reader.Document(topDoc.Doc); string serializedData = document.GetField(DataFieldName).GetStringValue(); T t = _serializer.Deserialize <T>(serializedData); string term = document.GetField(TermsFieldName).GetStringValue(); yield return(term, t); } }
public void TestSynonymsPhrase() { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.Add(new Index.Term("field", "old")); expected.Add(new Index.Term[] { new Index.Term("field", "dogs"), new Index.Term("field", "dog") }); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer()); assertEquals(expected, qp.Parse("\"old dogs\"")); qp.DefaultOperator = (QueryParserBase.Operator.AND); assertEquals(expected, qp.Parse("\"old dogs\"")); expected.Boost = (2.0f); assertEquals(expected, qp.Parse("\"old dogs\"^2")); expected.Slop = (3); assertEquals(expected, qp.Parse("\"old dogs\"~3^2")); }
public IEnumerable <LuceneDto> Search(string search, int take = 20) { var lucene = new List <LuceneDto>(); if (string.IsNullOrEmpty(search)) { return(lucene); } var phrase = new MultiPhraseQuery(); foreach (var x in search.Split(' ')) { phrase.Add(new Term("data", x)); } var searcher = new IndexSearcher(_writer.GetReader(applyAllDeletes: true)); var hits = searcher.Search(phrase, take).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); lucene.Add( new LuceneDto { Score = hit.Score, IdInternal = foundDoc.Get("id"), Name = foundDoc.Get("name"), Data = foundDoc.Get("data") } ); } return(lucene.OrderByDescending(x => x.Score).DistinctBy(x => x.Id)); }
public IEnumerable <string> FindMatches(string searchTerm, int maxDifference) { if (String.IsNullOrEmpty(searchTerm)) { yield break; } var dir = FSDirectory.Open(IndexLocation); var searcher = new IndexSearcher(DirectoryReader.Open(dir)); var directPhrase = new MultiPhraseQuery(); foreach (var item in searchTerm.Split(' ')) { directPhrase.Add(new Term("data", item)); } var directHits = searcher.Search(directPhrase, 20 /* top 20 */).ScoreDocs; foreach (var hit in directHits) { yield return(ConvertHits(searcher, hit)); } // don't do expensive fuzzyQuery if results are enough if (directHits.Length > 10) { yield break; } var phrase = new FuzzyQuery(new Term("data", searchTerm), 2); var hits = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs; foreach (var hit in hits) { yield return(ConvertHits(searcher, hit)); } }
private static List <string> searchFull_Lucene(string query, IndexSearcher searcher) { //Ищем полное название List <string> result_lucene = new List <string>(); var phrase = new MultiPhraseQuery(); phrase.Add(new Term("name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); result_lucene.Add(getFoundDocResult(foundDoc)); } return(result_lucene); }
public virtual void TestMultiPhraseQueryParsing() { TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[] { new TokenAndPos("a", 0), new TokenAndPos("1", 0), new TokenAndPos("b", 1), new TokenAndPos("1", 1), new TokenAndPos("c", 2) }; QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND)); Query q = qp.Parse("\"this text is acually ignored\""); assertTrue("wrong query type!", q is MultiPhraseQuery); MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); multiPhraseQuery.Add(new Term[] { new Term("field", "a"), new Term("field", "1") }, -1); multiPhraseQuery.Add(new Term[] { new Term("field", "b"), new Term("field", "1") }, 0); multiPhraseQuery.Add(new Term[] { new Term("field", "c") }, 1); assertEquals(multiPhraseQuery, q); }
static Query ExecuteAnalyzer(Analyzer analyzer, string field, string text) { List <List <Term> > terms = new List <List <Term> >(); List <Term> current = null; using var reader = new StringReader(text); TokenStream tokenStream = analyzer.TokenStream(field, reader); ITermAttribute termAttribute = tokenStream.AddAttribute <ITermAttribute>(); IPositionIncrementAttribute positionIncrementAttribute = tokenStream.AddAttribute <IPositionIncrementAttribute>(); while (tokenStream.IncrementToken()) { if (positionIncrementAttribute.PositionIncrement > 0) { current = new List <Term>(); terms.Add(current); } if (current != null) { current.Add(new Term(field, termAttribute.Term)); } } if (terms.Count == 1 && terms[0].Count == 1) { return(new TermQuery(terms[0][0])); } else if (terms.Select(l => l.Count).Sum() == terms.Count) { PhraseQuery phraseQuery = new PhraseQuery(); foreach (var positionList in terms) { phraseQuery.Add(positionList[0]); } return(phraseQuery); } else { MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); foreach (var positionList in terms) { multiPhraseQuery.Add(positionList.ToArray()); } return(multiPhraseQuery); } }
private QueryProvider <TLuceneEntity> TokenizedIncludePhrase <TResult>(string term, Expression <Func <TLuceneEntity, TResult> > selector, Occur occur) { if (term.Contains(" ")) { this.Open(occur); var termsList = term.Split(' '); termsList = RemoveEmpty(termsList); var wildCards = new List <PermutermWildcardQuery>(); for (int i = 0; i < termsList.Length; i++) { wildCards.Add(new PermutermWildcardQuery(new Term(selector.GetName(), $"{termsList[i]}"))); } var multiPhrase = new MultiPhraseQuery(); foreach (var item in termsList) { multiPhrase.Add(new Term(selector.GetName(), item)); } multiPhrase.Slop = 8; _current.Add(multiPhrase, Occur.MUST); this.Close(); } else { var query = new PermutermWildcardQuery(new Term(selector.GetName(), $"{term}")); _current.Add(query, occur); } _containsWildCard = true; return(this); }
public static List <ResultModel> Search(IWebHostEnvironment hostEnvironment, string query) { var AppLuceneVersion = LuceneVersion.LUCENE_48; var indexLocation = hostEnvironment.WebRootPath + "\\Index"; var dir = FSDirectory.Open(indexLocation); //create an analyzer to process the text var analyzer = new StandardAnalyzer(AppLuceneVersion); //create an index writer var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer); var writer = new IndexWriter(dir, indexConfig); var phrase = new MultiPhraseQuery(); phrase.Add(new Term("content", query)); // phrase.Add(new Term("title", query)); // re-use the writer to get real-time updates var searcher = new IndexSearcher(writer.GetReader(applyAllDeletes: true)); var hits = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs; var resultList = new List <ResultModel>(); foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); // hit.Score.Dump("Score"); resultList.Add(new ResultModel() { Title = foundDoc.Get("title"), Url = foundDoc.Get("url") }); } writer.Dispose(); return(resultList); }
public virtual Query Build(IQueryNode queryNode) { MultiPhraseQueryNode phraseNode = (MultiPhraseQueryNode)queryNode; MultiPhraseQuery phraseQuery = new MultiPhraseQuery(); IList <IQueryNode> children = phraseNode.GetChildren(); if (children != null) { IDictionary <int?, List <Term> > positionTermMap = new SortedDictionary <int?, List <Term> >(); foreach (IQueryNode child in children) { FieldQueryNode termNode = (FieldQueryNode)child; TermQuery termQuery = (TermQuery)termNode .GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID); List <Term> termList; if (!positionTermMap.TryGetValue(termNode.PositionIncrement, out termList) || termList == null) { termList = new List <Term>(); positionTermMap[termNode.PositionIncrement] = termList; } termList.Add(termQuery.Term); } foreach (int positionIncrement in positionTermMap.Keys) { List <Term> termList = positionTermMap[positionIncrement]; phraseQuery.Add(termList.ToArray(/*new Term[termList.size()]*/), positionIncrement); } } return(phraseQuery); }
private static List <string> searchOneWord_Lucene(string query, IndexSearcher searcher) { //Ищем по одному слову List <string> result_lucene = new List <string>(); var array = query.Split(' '); var phrase = new MultiPhraseQuery(); foreach (var word in array) { phrase = new MultiPhraseQuery(); if (!String.IsNullOrEmpty(word)) { phrase.Add(new Term("name_word", word)); var res = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); result_lucene.Add(getFoundDocResult(foundDoc)); } } } return(result_lucene); }
private void GetDataIndexId(DirectoryInfo directoryInfo, ref string textSearcher, ref List <string> lReturn) { using (Directory directory = FSDirectory.Open(directoryInfo)) using (Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30)) using (IndexReader indexReader = IndexReader.Open(directory, true)) using (Searcher indexSearcher = new IndexSearcher(indexReader)) { TopScoreDocCollector collectorMultiPhraseQuery = TopScoreDocCollector.Create(100, true); TopScoreDocCollector collectorQueryParser = TopScoreDocCollector.Create(100, true); int docId = 0; string tempObjectId = string.Empty; List <string> listTemp = new List <string>(); char[] delimiterChars = { ' ', ',', '.', ':', '\t' }; MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); //Here implement the search lines for graphs at level 3 multiPhraseQuery.Slop = 3; foreach (var word in textSearcher.Split(delimiterChars)) { multiPhraseQuery.Add(new Term("FullRequest", word)); } indexSearcher.Search(multiPhraseQuery, collectorMultiPhraseQuery); ScoreDoc[] listResultPharseQuery = collectorMultiPhraseQuery.TopDocs().ScoreDocs; foreach (var itemPharseQuery in listResultPharseQuery) { docId = itemPharseQuery.Doc; Document docPharseQuery = indexSearcher.Doc(docId); tempObjectId = docPharseQuery.Get("ObjetcId"); if (!string.IsNullOrEmpty(tempObjectId)) { listTemp.Add(tempObjectId); } } //This lineas implement QueryPArse docId = 0; tempObjectId = string.Empty; var queryParser = new QueryParser(Version.LUCENE_30, "FullRequest", analyzer); var query = queryParser.Parse(textSearcher); indexSearcher.Search(query, collectorQueryParser); ScoreDoc[] listResultquery = collectorQueryParser.TopDocs().ScoreDocs; foreach (var itemQuery in listResultquery) { docId = itemQuery.Doc; Document docQuery = indexSearcher.Doc(docId); tempObjectId = docQuery.Get("ObjetcId"); if (!string.IsNullOrEmpty(tempObjectId)) { listTemp.Add(tempObjectId); } } lReturn.AddRange(listTemp.Distinct().ToList()); } }
private void lucene_serach_Click(object sender, EventArgs e) { results.Rows.Clear(); var query = search_field.Text.ToLower(); var array = query.Split(' ').ToList(); var searcher = new IndexSearcher(writer1.GetReader(applyAllDeletes: true)); var totalResults = new List <Document>(); //одно слово QueryParser parser = new QueryParser(AppLuceneVersion, "name", analyzer); var phrase = new MultiPhraseQuery(); foreach (var word in array) { var q = parser.Parse(query); if (!String.IsNullOrEmpty(word)) { var res = searcher.Search(q, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } // полное название phrase.Add(new Term("name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } //части слов foreach (var word in array) { if (!string.IsNullOrEmpty(word)) { var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var res = searcher.Search(wild, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } //год и часть слова var year_to_find = ""; var number = 0; foreach (var word in array) { var result = TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } } Console.WriteLine(number != 0); if (number != 0) { phrase = new MultiPhraseQuery(); foreach (var word in array) { if (!string.IsNullOrEmpty(word)) { var booleanQuery = new BooleanQuery(); var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var num = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true); booleanQuery.Add(wild, Occur.SHOULD); booleanQuery.Add(num, Occur.SHOULD); var res = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } } foreach (var doc in totalResults) { results.Rows.Add(doc.GetField("id").GetInt32Value().ToString(), doc.GetValues("name")[0], doc.GetField("year").GetInt32Value().ToString()); } }
private Query BuildMatchAnywhereQuery(IndexReader indexReader, string expandedSearchString, bool matchCase) { List <string> searchTerms = null; string adjustedSearchString = expandedSearchString; string fieldToSearch = Constants.IndexFields.Content; if (!matchCase) { fieldToSearch = Constants.IndexFields.ContentCaseInsensitive; adjustedSearchString = adjustedSearchString.ToLower(); } searchTerms = adjustedSearchString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); bool onlyOneTerm = searchTerms.Count == 1; var firstTerm = searchTerms.FirstOrDefault(); var lastTerm = searchTerms.LastOrDefault(); Query contentQuery = null; if (onlyOneTerm) { bool isFirstTermPunctuation = (firstTerm.Length == 1 && LuceneHelper.IsPunctuation(firstTerm.First())); if (isFirstTermPunctuation) { contentQuery = new TermQuery(new Term(fieldToSearch, firstTerm)); } else { contentQuery = new WildcardQuery(new Term(fieldToSearch, "*" + firstTerm + "*")); } } else { MultiPhraseQuery phraseQuery = new MultiPhraseQuery(); List <Term> firstTermMatches = new List <Term>(); List <Term> lastTermMatches = new List <Term>(); CollectFirstAndLastTermMatches(indexReader, fieldToSearch, firstTermMatches, lastTermMatches, firstTerm, lastTerm); if (firstTermMatches.Count > 0) { phraseQuery.Add(firstTermMatches.ToArray()); } bool includeFirstTerm = firstTermMatches.Count == 0; bool includeLastTerm = lastTermMatches.Count == 0; int startIndex = includeFirstTerm ? 0 : 1; int endIndex = searchTerms.Count - (includeLastTerm ? 0 : 1); for (int i = startIndex; i < endIndex; i++) { phraseQuery.Add(new Term(fieldToSearch, searchTerms[i])); } if (lastTermMatches.Count > 0) { phraseQuery.Add(lastTermMatches.ToArray()); } contentQuery = phraseQuery; } return(contentQuery); }
private void button4_Click(object sender, EventArgs e) { int counter = 0; var query = find_text.ToLower(); var array = query.Split(' ').ToList(); List <string> res_list = new List <string>(); var searcher = new IndexSearcher(writer.GetReader()); var totalResults = new List <Document>(); //поиск по одному слову из названия var phrase = new MultiPhraseQuery(); foreach (var word in array) { phrase = new MultiPhraseQuery(); if (!String.IsNullOrEmpty(word)) { phrase.Add(new Term("name", word)); var res = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString())) { totalResults.Add(foundDoc); } } } } //поиск по всем словам названия phrase = new MultiPhraseQuery(); phrase.Add(new Term("name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString())) { totalResults.Add(foundDoc); } } //поиск по частичным словам названия foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var res = searcher.Search(wild, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString())) { totalResults.Add(foundDoc); } } } } //поиск по году и названию (части названия) string year_to_find = ""; int number = 0; foreach (var word in array) { bool result = Int32.TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } } Console.WriteLine(number != 0); if (number != 0) { phrase = new MultiPhraseQuery(); foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { BooleanQuery booleanQuery = new BooleanQuery(); var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var num = NumericRangeQuery.NewIntRange("year", 1, number, number, true, true); booleanQuery.Add(wild, Occur.SHOULD); booleanQuery.Add(num, Occur.SHOULD); var res = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString())) { totalResults.Add(foundDoc); } } } } } foreach (var doc in totalResults) { textBox1.AppendText(doc.ToString()); } }
private void SearchButton_Click(object sender, EventArgs e) { //Variables and pretty stuff int counter = 0; Cursor.Current = Cursors.WaitCursor; SearchButton.Enabled = false; ResultBox.Items.Clear(); var query = TextSearch.Text; var array = query.Split(' ').ToList(); List <string> res_list = new List <string>(); //Some sort of error handling try { if (!luceneCheck.Checked) { using (var conn = new NpgsqlConnection(connString)) { conn.Open(); var statement = ""; //Поиск по точному названию statement = "SELECT * " + "FROM movies " + "WHERE name = \'" + query + "\'"; var command = new NpgsqlCommand(statement, conn); var id = 0; var year = 0; var name = ""; using (var reader = command.ExecuteReader()) { while (reader.Read() && counter < 10) { id = reader.GetInt32(0); year = reader.GetInt32(1); name = reader.GetString(2); counter += 1; res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name); } } //Поиск по году и по названию //, если предыдущий ничего не дал //if (ResultBox.Items.Count == 0) //Ищем год в запросе string year_to_find = ""; int number = 0; foreach (var word in array) { bool result = Int32.TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } else { number = 0; } } //Если нашли if (number != 0) { foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { statement = "SELECT * " + "FROM movies " + "WHERE year = " + year_to_find + " AND name ILIKE \'%" + word + "%\' "; command = new NpgsqlCommand(statement, conn); using (var reader = command.ExecuteReader()) { while (reader.Read() && counter < 10) { counter += 1; id = reader.GetInt32(0); year = reader.GetInt32(1); name = reader.GetString(2); res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name); } } } } } //Поиск по слову в названии //, если предыдущие ничего не дали //if (ResultBox.Items.Count == 0) foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { statement = "SELECT * " + "FROM movies " + "WHERE name ILIKE \'" + word + " %\' " + "OR name = \'" + word + "\' " + "OR name ILIKE \'% " + word + "\'"; command = new NpgsqlCommand(statement, conn); using (var reader = command.ExecuteReader()) { while (reader.Read() && counter < 10) { counter += 1; id = reader.GetInt32(0); year = reader.GetInt32(1); name = reader.GetString(2); res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name); } } } } //Поиск по части слова в названии. Потому что надо найти хоть что-то //if (ResultBox.Items.Count == 0) foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { statement = "SELECT * " + "FROM movies " + "WHERE name ILIKE \'%" + word + "%\' "; command = new NpgsqlCommand(statement, conn); using (var reader = command.ExecuteReader()) { while (reader.Read() && counter < 10) { counter += 1; id = reader.GetInt32(0); year = reader.GetInt32(1); name = reader.GetString(2); res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name); } } } } //Дубли не хотим res_list = res_list.Select(x => x).Distinct().ToList(); ResultBox.Items.Clear(); foreach (var item in res_list) { ResultBox.Items.Add(item); } conn.Close(); } } else { //Ищем по одному слову QueryParser parser = new QueryParser(AppLuceneVersion, "name", analyzer); var phrase = new MultiPhraseQuery(); foreach (var word in array) { var q = parser.Parse(query); if (!String.IsNullOrEmpty(word)) { var res = searcher.Search(q, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() + " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]); } } } //Ищем полное название phrase.Add(new Term("name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() + " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]); } //Ищем части слов foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { var wild = new WildcardQuery(new Term("name", word)); var res = searcher.Search(wild, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() + " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]); } } } //Ищем год и часть слова string year_to_find = ""; int number = 0; foreach (var word in array) { bool result = Int32.TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } else { number = 0; } } //Если нашли if (number != 0) { phrase = new MultiPhraseQuery(); foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { BooleanQuery booleanQuery = new BooleanQuery(); var wild = new WildcardQuery(new Term("name", word)); var num = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true); booleanQuery.Add(wild, Occur.MUST); booleanQuery.Add(num, Occur.MUST); var res = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() + " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]); } } } } } //Не хотим дубли res_list = res_list.Select(x => x).Distinct().ToList(); ResultBox.Items.Clear(); foreach (var item in res_list) { ResultBox.Items.Add(item); } //Ну и если всё плохо if (ResultBox.Items.Count == 0) { ResultBox.Items.Add("Нет результатов. Попробуйте расширить поисковый запрос"); } } catch (Exception ex) { MessageBox.Show("Error occured while searching: " + ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); } Cursor.Current = Cursors.Default; SearchButton.Enabled = true; }
public static Hashtable Search(bool api) { BillType type; int number; int session = -1; if (HttpContext.Current.Request["session"] != null && HttpContext.Current.Request["session"] != "") { session = int.Parse(HttpContext.Current.Request["session"]); } string q = HttpContext.Current.Request["q"]; int start = 0, count = (!api ? 30 : 1000); if (HttpContext.Current.Request["start"] != null) { start = int.Parse(HttpContext.Current.Request["start"]); } if (HttpContext.Current.Request["count"] != null) { count = int.Parse(HttpContext.Current.Request["count"]); } BooleanQuery query = new BooleanQuery(); Hashtable no_results = new Hashtable(); no_results["count"] = 0; if (q != null && q.IndexOf("*") > -1) { return(no_results); } if (!api && session == -1 && q != null) { int slash = q.IndexOf('/'); if (slash >= q.Length - 4 && slash > 2) { try { session = int.Parse(q.Substring(slash + 1)); // and if that worked... q = q.Substring(0, slash); HttpContext.Current.Response.Redirect("billsearch.xpd?session=" + session + "&q=" + HttpUtility.UrlEncode(q)); return(null); } catch { } } } if (session == -1) { session = Util.CurrentSession; } //Console.Error.WriteLine("Find: " + q); string search_method = "search"; ArrayList specs = new ArrayList(); Hashtable scores = new Hashtable(); // Match a bill number exactly if (q != null && Bills.ParseID(q, out type, out number)) { if (!api) { // Redirect the user right to the bill page. // Don't even check if bill exists. HttpContext.Current.Response.Redirect( Bills.BillLink2(session, type, number)); return(null); } else { search_method = "search by bill number"; scores[session + EnumsConv.BillTypeToString(type) + number] = 1.0F; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", session), new Database.SpecEQ("type", EnumsConv.BillTypeToString(type)), new Database.SpecEQ("number", number))); } } // Match public law number exactly if (!api && q != null && (q.StartsWith("P.L.") || q.StartsWith("PL"))) { try { string num = null; if (q.StartsWith("P.L.")) { num = q.Substring(4); } if (q.StartsWith("PL")) { num = q.Substring(2); } num = num.Replace(" ", ""); int dash = num.IndexOf('-'); int s = int.Parse(num.Substring(0, dash)); TableRow bill = Util.Database.DBSelectFirst("billindex", "session, type, number", new Database.SpecEQ("idx", "publiclawnumber"), new Database.SpecEQ("session", s), new Database.SpecEQ("value", num)); if (bill != null) { if (!api) { HttpContext.Current.Response.Redirect(Bills.BillLink3((int)bill["session"], (string)bill["type"], (int)bill["number"])); return(null); } else { search_method = "search by public law number"; scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", (int)bill["session"]), new Database.SpecEQ("type", (string)bill["type"]), new Database.SpecEQ("number", (int)bill["number"]))); } } } catch { } } if (session == -1) { session = Util.CurrentSession; } // Match USC reference Regex uscexp = new Regex(@"(\d[0-9A-Za-z\-]*)\s+U\.?S\.?C\.?\s+(\d[0-9A-Za-z\-]*)((\s*\([^\) ]+\))*)", RegexOptions.IgnoreCase); Match uscmc = (q == null ? null : uscexp.Match(q)); if (uscmc != null && uscmc.Success) { string title = uscmc.Groups[1].Value; string section = uscmc.Groups[2].Value; string paragraph = uscmc.Groups[3].Value; string[] ps = paragraph.Split('[', '(', ')', ' '); int psi = 0; while (psi < ps.Length - 1 && ps[psi] == "") { psi++; } int pse = ps.Length - 1; while (pse > 0 && ps[pse] == "") { pse--; } if (ps.Length != 0) { paragraph = "_" + String.Join("_", ps, psi, pse - psi + 1); } Table table = Util.Database.DBSelect("billusc", "session, type, number", new Database.SpecEQ("session", session), new Database.OrSpec( new Database.SpecEQ("ref", title + "_" + section + paragraph), new Database.SpecStartsWith("ref", title + "_" + section + paragraph + "_"))); foreach (TableRow bill in table) { search_method = "search by U.S.C. section"; scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", (int)bill["session"]), new Database.SpecEQ("type", (string)bill["type"]), new Database.SpecEQ("number", (int)bill["number"]))); } } int total_count = -1; if (specs.Count == 0) { if (q != null && q.Trim() != "") { BooleanQuery query1 = new BooleanQuery(); query.Add(query1, BooleanClause.Occur.MUST); try { /*if (!q.StartsWith("-")) { * PhraseQuery pq = new PhraseQuery(); * pq.Add( new Term("shorttitles", q) ); * pq.SetBoost((float)4); * query1.Add(pq, false, false); * }*/ Query query_titles2 = new QueryParser("shorttitles", new StandardAnalyzer()).Parse(q); query_titles2.SetBoost((float)3); query1.Add(query_titles2, BooleanClause.Occur.SHOULD); Query query_titles1 = new QueryParser("officialtitles", new StandardAnalyzer()).Parse(q); query_titles1.SetBoost((float)2); query1.Add(query_titles1, BooleanClause.Occur.SHOULD); Query query_summary = new QueryParser("summary", new StandardAnalyzer()).Parse(q); query1.Add(query_summary, BooleanClause.Occur.SHOULD); Query query_text = new QueryParser("fulltext", new StandardAnalyzer()).Parse(q); query1.Add(query_text, BooleanClause.Occur.SHOULD); } catch (Exception e) { return(no_results); } } string chamber = HttpContext.Current.Request["chamber"]; string[] status = HttpContext.Current.Request["status"] == null ? null : HttpContext.Current.Request["status"].Split(','); string sponsor = HttpContext.Current.Request["sponsor"]; string cosponsor = HttpContext.Current.Request["cosponsor"]; if (chamber != null && (chamber == "s" || chamber == "h")) { query.Add(new WildcardQuery(new Term("type", chamber + "*")), BooleanClause.Occur.MUST); } if (status != null && status[0] != "") { List <Term> terms = new List <Term>(); foreach (string s in status) { terms.Add(new Term("state", s)); } MultiPhraseQuery mpq = new MultiPhraseQuery(); mpq.Add(terms.ToArray()); query.Add(mpq, BooleanClause.Occur.MUST); } if (sponsor != null && sponsor != "") { query.Add(new TermQuery(new Term("sponsor", sponsor)), BooleanClause.Occur.MUST); } if (cosponsor != null && cosponsor != "") { query.Add(new TermQuery(new Term("cosponsor", cosponsor)), BooleanClause.Occur.MUST); } IndexSearcher searcher = new IndexSearcher(Util.DataPath + Path.DirectorySeparatorChar + session + Path.DirectorySeparatorChar + "index.bills.lucene"); Sort sort = null; if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "introduced") { sort = new Sort(new SortField("introduced", SortField.STRING, true)); } if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "lastaction") { sort = new Sort(new SortField("lastaction", SortField.STRING, true)); } Hits hits = searcher.Search(query, sort == null ? new Sort() : sort); int end = hits.Length(); if (start + count < end) { end = start + count; } total_count = hits.Length(); for (int i = start; i < end; i++) { Document doc = hits.Doc(i); string billsession = doc.Get("session"); string billtype = doc.Get("type"); string billnumber = doc.Get("number"); int istatus = (int)EnumsConv.BillStatusFromString(doc.Get("status")); float score; if (sort == null) // readjust the score based on status { score = hits.Score(i) + istatus / (float)8 * (float).2; } else // keep order from Lucene { score = -i; } scores[billsession + billtype + billnumber] = score; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", billsession), new Database.SpecEQ("type", billtype), new Database.SpecEQ("number", billnumber))); } if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "hits" && specs.Count > 0) { Table hitsinfo = Util.Database.DBSelect("billhits", "*", Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec)))); foreach (TableRow billhits in hitsinfo) { scores["" + billhits["session"] + billhits["type"] + billhits["number"]] = (float)(int)billhits["hits1"]; } } } if (specs.Count == 0) { return(no_results); } Table billinfo = Util.Database.DBSelect("billstatus", "*", Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec)))); if (total_count == -1) { total_count = billinfo.Rows; } ArrayList ret = new ArrayList(); foreach (TableRow r in billinfo) { ret.Add(r); } BillHitComparer bhc = new BillHitComparer(); bhc.scores = scores; ret.Sort(bhc); Hashtable ret2 = new Hashtable(); ret2["count"] = total_count; ret2["method"] = search_method; ret2["results"] = ret; return(ret2); }
public void TestQueryScorerMultiPhraseQueryHighlightingWithGap() { MultiPhraseQuery mpq = new MultiPhraseQuery(); /* * The toString of MultiPhraseQuery doesn't work so well with these * out-of-order additions, but the Query itself seems to match accurately. */ mpq.Add(new Term[] {new Term(FIELD_NAME, "wordz")}, 2); mpq.Add(new Term[] {new Term(FIELD_NAME, "wordx")}, 0); DoSearching(mpq); int maxNumFragmentsRequired = 1; int expectedHighlights = 2; AssertExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights); }
static void Main(string[] args) { var jsonProps = "[{\"name\":\"Ondrej\",\"surname\":\"Kubicek\",\"app_data\":[\"112233\", \"56612\"]}," + "{\"name\":\"Lukas\",\"surname\":\"Bily\",\"app_data\":[\"12355\", \"112233\", \"89466\"]}," + "{\"name\":\"Lenak\",\"surname\":\"Nejaka\",\"app_data\":[\"89700\"]}]"; var version = LuceneVersion.LUCENE_48; var dir = new RAMDirectory(); var analyzer = new StandardAnalyzer(version); var indexConfig = new IndexWriterConfig(version, analyzer); var writer = new IndexWriter(dir, indexConfig); var d = JsonDocument.Parse(jsonProps); var root = d.RootElement; foreach (var line in root.EnumerateArray()) { var doc = new Document(); doc.Add(new StringField("name", line.GetProperty("name").GetString(), Field.Store.NO)); doc.Add(new StringField("surname", line.GetProperty("surname").GetString(), Field.Store.NO)); foreach (var f in line.GetProperty("app_data").EnumerateArray()) { doc.Add(new StringField("app_data", f.GetString(), Field.Store.NO)); } doc.Add(new StringField("payload", line.ToString(), Field.Store.YES)); writer.AddDocument(doc); // Console.WriteLine(line.GetProperty("name")); // if (line.GetProperty("app_data").EnumerateArray().Any(x => x.GetString() == "1")) // { // } // foreach(var data in line.GetProperty("app_data").EnumerateArray()) // { // } // Console.WriteLine(line.GetProperty("app_data").GetArrayLength()); } writer.Flush(false, false); var searcher = new IndexSearcher(writer.GetReader(true)); var query = new MultiPhraseQuery(); query.Add(new Term("app_data", "12355")); var booleanQuery = new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term("app_data", "12355")), Occur.SHOULD); booleanQuery.Add(new TermQuery(new Term("app_data", "89700")), Occur.SHOULD); var res = searcher.Search(booleanQuery, 100); Console.WriteLine(res.TotalHits); foreach (var hit in res.ScoreDocs) { var item = searcher.Doc(hit.Doc); Console.WriteLine(item.Get("payload")); } }
/// <summary> /// Creates a query from the analysis chain. /// <para/> /// Expert: this is more useful for subclasses such as queryparsers. /// If using this class directly, just use <see cref="CreateBooleanQuery(string, string)"/> /// and <see cref="CreatePhraseQuery(string, string)"/>. </summary> /// <param name="analyzer"> Analyzer used for this query. </param> /// <param name="operator"> Default boolean operator used for this query. </param> /// <param name="field"> Field to create queries against. </param> /// <param name="queryText"> Text to be passed to the analysis chain. </param> /// <param name="quoted"> <c>true</c> if phrases should be generated when terms occur at more than one position. </param> /// <param name="phraseSlop"> Slop factor for phrase/multiphrase queries. </param> protected Query CreateFieldQuery(Analyzer analyzer, Occur @operator, string field, string queryText, bool quoted, int phraseSlop) { Debug.Assert(@operator == Occur.SHOULD || @operator == Occur.MUST); // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count CachingTokenFilter buffer = null; ITermToBytesRefAttribute termAtt = null; IPositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; bool severalTokensAtSamePosition = false; bool hasMoreTokens = false; TokenStream source = null; try { source = analyzer.GetTokenStream(field, new StringReader(queryText)); source.Reset(); buffer = new CachingTokenFilter(source); buffer.Reset(); if (buffer.HasAttribute <ITermToBytesRefAttribute>()) { termAtt = buffer.GetAttribute <ITermToBytesRefAttribute>(); } if (buffer.HasAttribute <IPositionIncrementAttribute>()) { posIncrAtt = buffer.GetAttribute <IPositionIncrementAttribute>(); } if (termAtt != null) { try { hasMoreTokens = buffer.IncrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.IncrementToken(); } } catch (System.IO.IOException) { // ignore } } } catch (System.IO.IOException e) { throw new Exception("Error analyzing query text", e); } finally { IOUtils.DisposeWhileHandlingException(source); } // rewind the buffer stream buffer.Reset(); BytesRef bytes = termAtt == null ? null : termAtt.BytesRef; if (numTokens == 0) { return(null); } else if (numTokens == 1) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } return(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)))); } else { if (severalTokensAtSamePosition || (!quoted)) { if (positionCount == 1 || (!quoted)) { // no phrase query: if (positionCount == 1) { // simple case: only one position, with synonyms BooleanQuery q = NewBooleanQuery(true); for (int i = 0; i < numTokens; i++) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } Query currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))); q.Add(currentQuery, Occur.SHOULD); } return(q); } else { // multiple positions BooleanQuery q = NewBooleanQuery(false); Query currentQuery = null; for (int i = 0; i < numTokens; i++) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0) { if (!(currentQuery is BooleanQuery)) { Query t = currentQuery; currentQuery = NewBooleanQuery(true); ((BooleanQuery)currentQuery).Add(t, Occur.SHOULD); } ((BooleanQuery)currentQuery).Add(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))), Occur.SHOULD); } else { if (currentQuery != null) { q.Add(currentQuery, @operator); } currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))); } } q.Add(currentQuery, @operator); return(q); } } else { // phrase query: MultiPhraseQuery mpq = NewMultiPhraseQuery(); mpq.Slop = phraseSlop; IList <Term> multiTerms = new List <Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.Count > 0) { if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } multiTerms.Clear(); } position += positionIncrement; multiTerms.Add(new Term(field, BytesRef.DeepCopyOf(bytes))); } if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } return(mpq); } } else { PhraseQuery pq = NewPhraseQuery(); pq.Slop = phraseSlop; int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)), position); } else { pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes))); } } return(pq); } } }
/// <summary> /// On base of https://lucenenet.apache.org /// </summary> /// <param name="query"></param> /// <returns></returns> public IEnumerable <(string name, int year)> SearchWithLucy(string query) { var words = query.Split(' ').ToList(); var searcher = new IndexSearcher(lucyAdapter.lucyWriter.GetReader(applyAllDeletes: true)); var totalResults = new List <Document>(); //word MultiPhraseQuery multiPhraseQuery; foreach (var word in words) { multiPhraseQuery = new MultiPhraseQuery(); if (string.IsNullOrEmpty(word)) { continue; } multiPhraseQuery.Add(new Term("name_word", word)); var docs = searcher.Search(multiPhraseQuery, 10).ScoreDocs; foreach (var doc in docs) { var document = searcher.Doc(doc.Doc); if (totalResults.All(f => f.GetField("id").GetInt32Value() != document.GetField("id").GetInt32Value())) { totalResults.Add(document); } } } // full name multiPhraseQuery = new MultiPhraseQuery(); multiPhraseQuery.Add(new Term("full_name", query)); var scoreDocs = searcher.Search(multiPhraseQuery, 10).ScoreDocs; foreach (var scoreDoc in scoreDocs) { var doc = searcher.Doc(scoreDoc.Doc); if (totalResults.All(f => f.GetField("id").GetInt32Value() != doc.GetField("id").GetInt32Value())) { totalResults.Add(doc); } } //word parts foreach (var word in words) { if (string.IsNullOrEmpty(word)) { continue; } var wildcardQuery = new WildcardQuery(new Term("name_word", "*" + word + "*")); var docs = searcher.Search(wildcardQuery, 10).ScoreDocs; foreach (var doc in docs) { var document = searcher.Doc(doc.Doc); if (totalResults.All(f => f.GetField("id").GetInt32Value() != document.GetField("id").GetInt32Value())) { totalResults.Add(document); } } } //year and word part var number = 0; foreach (var word in words) { var result = int.TryParse(word, out number); if (!result) { continue; } words.RemoveAt(words.IndexOf(word)); break; } if (number != 0) { foreach (var word in words) { if (string.IsNullOrEmpty(word)) { continue; } var booleanQuery = new BooleanQuery(); var wildcardQuery = new WildcardQuery(new Term("name_word", "*" + word + "*")); var rangeQuery = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true); booleanQuery.Add(wildcardQuery, Occur.SHOULD); booleanQuery.Add(rangeQuery, Occur.SHOULD); var docs = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var doc in docs) { var foundDoc = searcher.Doc(doc.Doc); if (totalResults.All(f => f.GetField("id").GetInt32Value() != foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } foreach (var doc in totalResults.Take(10)) { yield return(doc.GetValues("full_name")[0], (int)doc.GetField("year").GetInt32Value()); } }
public async Task <SearchResult <T> > SearchAsync(SearchQuery queryDefinition, CancellationToken cancellationToken = default) { using (await writerLock.ReaderLockAsync(cancellationToken)) { var result = new SearchResult <T>(); List <T> hits = new List <T>(); using (var writer = getWriter()) { Query query = new MatchAllDocsQuery(); // Term queries if (queryDefinition.TermQueries.Any()) { var phraseQuery = new MultiPhraseQuery(); foreach (var termQuery in queryDefinition.TermQueries) { phraseQuery.Add( termQuery.value .Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries) .Select(phrase => new Term(termQuery.field, phrase.ToLower())) .ToArray() ); } query = phraseQuery; } var reader = writer.DocsWriter.GetReader(applyAllDeletes: true); var searcher = new IndexSearcher(reader); var luceneResult = searcher.Search(query, queryDefinition.Limit); foreach (var doc in luceneResult.ScoreDocs) { var foundDoc = searcher.Doc(doc.Doc); hits.Add(await inflateDocument(foundDoc)); } result.TotalHits = luceneResult.TotalHits; result.Hits = hits; // Facets if (queryDefinition.Facets.Any()) { FacetsConfig facetsConfig = new FacetsConfig(); FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, query, queryDefinition.FacetMax, fc); using (var taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.Open(Path.Combine(options.IndexPath, indexType, "taxonomy")))) { var facets = new FastTaxonomyFacetCounts(taxonomyReader, facetsConfig, fc); foreach (var facet in queryDefinition.Facets) { var facetGroup = new FacetGroup { Field = facet }; facetGroup.Facets = facets.GetTopChildren(queryDefinition.FacetMax, facet).LabelValues .Select(x => new Facet { Key = x.Label, Count = (long)x.Value }) .ToArray(); result.FacetGroups.Add(facetGroup); } } } } return(result); } }
public void TestQueryScorerMultiPhraseQueryHighlighting() { MultiPhraseQuery mpq = new MultiPhraseQuery(); mpq.Add(new Term[] {new Term(FIELD_NAME, "wordx"), new Term(FIELD_NAME, "wordb")}); mpq.Add(new Term(FIELD_NAME, "wordy")); DoSearching(mpq); int maxNumFragmentsRequired = 2; AssertExpectedHighlightCount(maxNumFragmentsRequired, 6); }
private void lucene_serach_Click(object sender, EventArgs e) { int counter = 0; this.results.Rows.Clear(); var query = this.search_field.Text.ToLower(); var array = query.Split(' ').ToList(); List <string> res_list = new List <string>(); var searcher = new IndexSearcher(writer1.GetReader(applyAllDeletes: true)); //одно слово var phrase = new MultiPhraseQuery(); foreach (var word in array) { phrase = new MultiPhraseQuery(); if (!String.IsNullOrEmpty(word)) { phrase.Add(new Term("name_word", word)); var res = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString()); } } } // полное название phrase = new MultiPhraseQuery(); phrase.Add(new Term("full_name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString()); } //части слов foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { var wild = new WildcardQuery(new Term("name_word", "*" + word + "*")); var res = searcher.Search(wild, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString()); } } } //год и часть слова string year_to_find = ""; int number = 0; foreach (var word in array) { bool result = Int32.TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } } Console.WriteLine(number != 0); if (number != 0) { phrase = new MultiPhraseQuery(); foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { BooleanQuery booleanQuery = new BooleanQuery(); var wild = new WildcardQuery(new Term("name_word", "*" + word + "*")); var num = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true); booleanQuery.Add(wild, Occur.SHOULD); booleanQuery.Add(num, Occur.SHOULD); var res = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString()); } } } } }