public virtual Query Build(IQueryNode queryNode) { MultiPhraseQueryNode phraseNode = (MultiPhraseQueryNode)queryNode; MultiPhraseQuery phraseQuery = new MultiPhraseQuery(); IList <IQueryNode> children = phraseNode.GetChildren(); if (children != null) { IDictionary <int, JCG.List <Term> > positionTermMap = new JCG.SortedDictionary <int, JCG.List <Term> >(); foreach (IQueryNode child in children) { FieldQueryNode termNode = (FieldQueryNode)child; TermQuery termQuery = (TermQuery)termNode .GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID); if (!positionTermMap.TryGetValue(termNode.PositionIncrement, out JCG.List <Term> termList) || termList is null) { termList = new JCG.List <Term>(); positionTermMap[termNode.PositionIncrement] = termList; } termList.Add(termQuery.Term); } foreach (int positionIncrement in positionTermMap.Keys) { JCG.List <Term> termList = positionTermMap[positionIncrement]; phraseQuery.Add(termList.ToArray(/*new Term[termList.size()]*/), positionIncrement); } } return(phraseQuery); }
public MultiPhraseWeight(MultiPhraseQuery outerInstance, IndexSearcher searcher) { this.outerInstance = outerInstance; this.similarity = searcher.Similarity; IndexReaderContext context = searcher.TopReaderContext; // compute idf var allTermStats = new JCG.List <TermStatistics>(); foreach (Term[] terms in outerInstance.termArrays) { foreach (Term term in terms) { if (!termContexts.TryGetValue(term, out TermContext termContext) || termContext is null) { termContext = TermContext.Build(context, term); termContexts[term] = termContext; } allTermStats.Add(searcher.TermStatistics(term, termContext)); } } stats = similarity.ComputeWeight(outerInstance.Boost, searcher.CollectionStatistics(outerInstance.field), allTermStats.ToArray()); }
private static List <string> searchOneWord_Lucene(string query, IndexSearcher searcher) { //Ищем по одному слову List <string> result_lucene = new List <string>(); var array = query.Split(' '); var phrase = new MultiPhraseQuery(); foreach (var word in array) { phrase = new MultiPhraseQuery(); if (!String.IsNullOrEmpty(word)) { phrase.Add(new Term("name_word", word)); var res = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); result_lucene.Add(getFoundDocResult(foundDoc)); } } } return(result_lucene); }
public virtual void TestMultiPhraseQueryParsing() { TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[] { new TokenAndPos("a", 0), new TokenAndPos("1", 0), new TokenAndPos("b", 1), new TokenAndPos("1", 1), new TokenAndPos("c", 2) }; QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND)); Query q = qp.Parse("\"this text is acually ignored\""); assertTrue("wrong query type!", q is MultiPhraseQuery); MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); multiPhraseQuery.Add(new Term[] { new Term("field", "a"), new Term("field", "1") }, -1); multiPhraseQuery.Add(new Term[] { new Term("field", "b"), new Term("field", "1") }, 0); multiPhraseQuery.Add(new Term[] { new Term("field", "c") }, 1); assertEquals(multiPhraseQuery, q); }
private void SearchButton_Click(object sender, EventArgs e) { //Variables and pretty stuff int counter = 0; Cursor.Current = Cursors.WaitCursor; SearchButton.Enabled = false; ResultBox.Items.Clear(); var query = TextSearch.Text; var array = query.Split(' ').ToList(); List <string> res_list = new List <string>(); //Some sort of error handling try { if (!luceneCheck.Checked) { using (var conn = new NpgsqlConnection(connString)) { conn.Open(); var statement = ""; //Поиск по точному названию statement = "SELECT * " + "FROM movies " + "WHERE name = \'" + query + "\'"; var command = new NpgsqlCommand(statement, conn); var id = 0; var year = 0; var name = ""; using (var reader = command.ExecuteReader()) { while (reader.Read() && counter < 10) { id = reader.GetInt32(0); year = reader.GetInt32(1); name = reader.GetString(2); counter += 1; res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name); } } //Поиск по году и по названию //, если предыдущий ничего не дал //if (ResultBox.Items.Count == 0) //Ищем год в запросе string year_to_find = ""; int number = 0; foreach (var word in array) { bool result = Int32.TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } else { number = 0; } } //Если нашли if (number != 0) { foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { statement = "SELECT * " + "FROM movies " + "WHERE year = " + year_to_find + " AND name ILIKE \'%" + word + "%\' "; command = new NpgsqlCommand(statement, conn); using (var reader = command.ExecuteReader()) { while (reader.Read() && counter < 10) { counter += 1; id = reader.GetInt32(0); year = reader.GetInt32(1); name = reader.GetString(2); res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name); } } } } } //Поиск по слову в названии //, если предыдущие ничего не дали //if (ResultBox.Items.Count == 0) foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { statement = "SELECT * " + "FROM movies " + "WHERE name ILIKE \'" + word + " %\' " + "OR name = \'" + word + "\' " + "OR name ILIKE \'% " + word + "\'"; command = new NpgsqlCommand(statement, conn); using (var reader = command.ExecuteReader()) { while (reader.Read() && counter < 10) { counter += 1; id = reader.GetInt32(0); year = reader.GetInt32(1); name = reader.GetString(2); res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name); } } } } //Поиск по части слова в названии. Потому что надо найти хоть что-то //if (ResultBox.Items.Count == 0) foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { statement = "SELECT * " + "FROM movies " + "WHERE name ILIKE \'%" + word + "%\' "; command = new NpgsqlCommand(statement, conn); using (var reader = command.ExecuteReader()) { while (reader.Read() && counter < 10) { counter += 1; id = reader.GetInt32(0); year = reader.GetInt32(1); name = reader.GetString(2); res_list.Add("ID: " + id.ToString() + " YEAR: " + year.ToString() + " NAME: " + name); } } } } //Дубли не хотим res_list = res_list.Select(x => x).Distinct().ToList(); ResultBox.Items.Clear(); foreach (var item in res_list) { ResultBox.Items.Add(item); } conn.Close(); } } else { //Ищем по одному слову QueryParser parser = new QueryParser(AppLuceneVersion, "name", analyzer); var phrase = new MultiPhraseQuery(); foreach (var word in array) { var q = parser.Parse(query); if (!String.IsNullOrEmpty(word)) { var res = searcher.Search(q, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() + " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]); } } } //Ищем полное название phrase.Add(new Term("name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() + " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]); } //Ищем части слов foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { var wild = new WildcardQuery(new Term("name", word)); var res = searcher.Search(wild, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() + " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]); } } } //Ищем год и часть слова string year_to_find = ""; int number = 0; foreach (var word in array) { bool result = Int32.TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } else { number = 0; } } //Если нашли if (number != 0) { phrase = new MultiPhraseQuery(); foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { BooleanQuery booleanQuery = new BooleanQuery(); var wild = new WildcardQuery(new Term("name", word)); var num = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true); booleanQuery.Add(wild, Occur.MUST); booleanQuery.Add(num, Occur.MUST); var res = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; res_list.Add("Score: " + score + " ID: " + foundDoc.GetField("id").GetInt32Value().ToString() + " YEAR: " + foundDoc.GetField("year").GetInt32Value().ToString() + " NAME: " + foundDoc.GetValues("name")[0]); } } } } } //Не хотим дубли res_list = res_list.Select(x => x).Distinct().ToList(); ResultBox.Items.Clear(); foreach (var item in res_list) { ResultBox.Items.Add(item); } //Ну и если всё плохо if (ResultBox.Items.Count == 0) { ResultBox.Items.Add("Нет результатов. Попробуйте расширить поисковый запрос"); } } catch (Exception ex) { MessageBox.Show("Error occured while searching: " + ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); } Cursor.Current = Cursors.Default; SearchButton.Enabled = true; }
public virtual Query VisitMultiPhraseQuery(MultiPhraseQuery multiPhraseq) { throw new SnNotSupportedException(); }
public static Hashtable Search(bool api) { BillType type; int number; int session = -1; if (HttpContext.Current.Request["session"] != null && HttpContext.Current.Request["session"] != "") { session = int.Parse(HttpContext.Current.Request["session"]); } string q = HttpContext.Current.Request["q"]; int start = 0, count = (!api ? 30 : 1000); if (HttpContext.Current.Request["start"] != null) { start = int.Parse(HttpContext.Current.Request["start"]); } if (HttpContext.Current.Request["count"] != null) { count = int.Parse(HttpContext.Current.Request["count"]); } BooleanQuery query = new BooleanQuery(); Hashtable no_results = new Hashtable(); no_results["count"] = 0; if (q != null && q.IndexOf("*") > -1) { return(no_results); } if (!api && session == -1 && q != null) { int slash = q.IndexOf('/'); if (slash >= q.Length - 4 && slash > 2) { try { session = int.Parse(q.Substring(slash + 1)); // and if that worked... q = q.Substring(0, slash); HttpContext.Current.Response.Redirect("billsearch.xpd?session=" + session + "&q=" + HttpUtility.UrlEncode(q)); return(null); } catch { } } } if (session == -1) { session = Util.CurrentSession; } //Console.Error.WriteLine("Find: " + q); string search_method = "search"; ArrayList specs = new ArrayList(); Hashtable scores = new Hashtable(); // Match a bill number exactly if (q != null && Bills.ParseID(q, out type, out number)) { if (!api) { // Redirect the user right to the bill page. // Don't even check if bill exists. HttpContext.Current.Response.Redirect( Bills.BillLink2(session, type, number)); return(null); } else { search_method = "search by bill number"; scores[session + EnumsConv.BillTypeToString(type) + number] = 1.0F; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", session), new Database.SpecEQ("type", EnumsConv.BillTypeToString(type)), new Database.SpecEQ("number", number))); } } // Match public law number exactly if (!api && q != null && (q.StartsWith("P.L.") || q.StartsWith("PL"))) { try { string num = null; if (q.StartsWith("P.L.")) { num = q.Substring(4); } if (q.StartsWith("PL")) { num = q.Substring(2); } num = num.Replace(" ", ""); int dash = num.IndexOf('-'); int s = int.Parse(num.Substring(0, dash)); TableRow bill = Util.Database.DBSelectFirst("billindex", "session, type, number", new Database.SpecEQ("idx", "publiclawnumber"), new Database.SpecEQ("session", s), new Database.SpecEQ("value", num)); if (bill != null) { if (!api) { HttpContext.Current.Response.Redirect(Bills.BillLink3((int)bill["session"], (string)bill["type"], (int)bill["number"])); return(null); } else { search_method = "search by public law number"; scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", (int)bill["session"]), new Database.SpecEQ("type", (string)bill["type"]), new Database.SpecEQ("number", (int)bill["number"]))); } } } catch { } } if (session == -1) { session = Util.CurrentSession; } // Match USC reference Regex uscexp = new Regex(@"(\d[0-9A-Za-z\-]*)\s+U\.?S\.?C\.?\s+(\d[0-9A-Za-z\-]*)((\s*\([^\) ]+\))*)", RegexOptions.IgnoreCase); Match uscmc = (q == null ? null : uscexp.Match(q)); if (uscmc != null && uscmc.Success) { string title = uscmc.Groups[1].Value; string section = uscmc.Groups[2].Value; string paragraph = uscmc.Groups[3].Value; string[] ps = paragraph.Split('[', '(', ')', ' '); int psi = 0; while (psi < ps.Length - 1 && ps[psi] == "") { psi++; } int pse = ps.Length - 1; while (pse > 0 && ps[pse] == "") { pse--; } if (ps.Length != 0) { paragraph = "_" + String.Join("_", ps, psi, pse - psi + 1); } Table table = Util.Database.DBSelect("billusc", "session, type, number", new Database.SpecEQ("session", session), new Database.OrSpec( new Database.SpecEQ("ref", title + "_" + section + paragraph), new Database.SpecStartsWith("ref", title + "_" + section + paragraph + "_"))); foreach (TableRow bill in table) { search_method = "search by U.S.C. section"; scores[(int)bill["session"] + (string)bill["type"] + (int)bill["number"]] = 1.0F; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", (int)bill["session"]), new Database.SpecEQ("type", (string)bill["type"]), new Database.SpecEQ("number", (int)bill["number"]))); } } int total_count = -1; if (specs.Count == 0) { if (q != null && q.Trim() != "") { BooleanQuery query1 = new BooleanQuery(); query.Add(query1, BooleanClause.Occur.MUST); try { /*if (!q.StartsWith("-")) { * PhraseQuery pq = new PhraseQuery(); * pq.Add( new Term("shorttitles", q) ); * pq.SetBoost((float)4); * query1.Add(pq, false, false); * }*/ Query query_titles2 = new QueryParser("shorttitles", new StandardAnalyzer()).Parse(q); query_titles2.SetBoost((float)3); query1.Add(query_titles2, BooleanClause.Occur.SHOULD); Query query_titles1 = new QueryParser("officialtitles", new StandardAnalyzer()).Parse(q); query_titles1.SetBoost((float)2); query1.Add(query_titles1, BooleanClause.Occur.SHOULD); Query query_summary = new QueryParser("summary", new StandardAnalyzer()).Parse(q); query1.Add(query_summary, BooleanClause.Occur.SHOULD); Query query_text = new QueryParser("fulltext", new StandardAnalyzer()).Parse(q); query1.Add(query_text, BooleanClause.Occur.SHOULD); } catch (Exception e) { return(no_results); } } string chamber = HttpContext.Current.Request["chamber"]; string[] status = HttpContext.Current.Request["status"] == null ? null : HttpContext.Current.Request["status"].Split(','); string sponsor = HttpContext.Current.Request["sponsor"]; string cosponsor = HttpContext.Current.Request["cosponsor"]; if (chamber != null && (chamber == "s" || chamber == "h")) { query.Add(new WildcardQuery(new Term("type", chamber + "*")), BooleanClause.Occur.MUST); } if (status != null && status[0] != "") { List <Term> terms = new List <Term>(); foreach (string s in status) { terms.Add(new Term("state", s)); } MultiPhraseQuery mpq = new MultiPhraseQuery(); mpq.Add(terms.ToArray()); query.Add(mpq, BooleanClause.Occur.MUST); } if (sponsor != null && sponsor != "") { query.Add(new TermQuery(new Term("sponsor", sponsor)), BooleanClause.Occur.MUST); } if (cosponsor != null && cosponsor != "") { query.Add(new TermQuery(new Term("cosponsor", cosponsor)), BooleanClause.Occur.MUST); } IndexSearcher searcher = new IndexSearcher(Util.DataPath + Path.DirectorySeparatorChar + session + Path.DirectorySeparatorChar + "index.bills.lucene"); Sort sort = null; if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "introduced") { sort = new Sort(new SortField("introduced", SortField.STRING, true)); } if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "lastaction") { sort = new Sort(new SortField("lastaction", SortField.STRING, true)); } Hits hits = searcher.Search(query, sort == null ? new Sort() : sort); int end = hits.Length(); if (start + count < end) { end = start + count; } total_count = hits.Length(); for (int i = start; i < end; i++) { Document doc = hits.Doc(i); string billsession = doc.Get("session"); string billtype = doc.Get("type"); string billnumber = doc.Get("number"); int istatus = (int)EnumsConv.BillStatusFromString(doc.Get("status")); float score; if (sort == null) // readjust the score based on status { score = hits.Score(i) + istatus / (float)8 * (float).2; } else // keep order from Lucene { score = -i; } scores[billsession + billtype + billnumber] = score; specs.Add(new Database.AndSpec( new Database.SpecEQ("session", billsession), new Database.SpecEQ("type", billtype), new Database.SpecEQ("number", billnumber))); } if (HttpContext.Current.Request["sort"] != null && HttpContext.Current.Request["sort"] == "hits" && specs.Count > 0) { Table hitsinfo = Util.Database.DBSelect("billhits", "*", Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec)))); foreach (TableRow billhits in hitsinfo) { scores["" + billhits["session"] + billhits["type"] + billhits["number"]] = (float)(int)billhits["hits1"]; } } } if (specs.Count == 0) { return(no_results); } Table billinfo = Util.Database.DBSelect("billstatus", "*", Database.OrSpec.New((Database.Spec[])specs.ToArray(typeof(Database.Spec)))); if (total_count == -1) { total_count = billinfo.Rows; } ArrayList ret = new ArrayList(); foreach (TableRow r in billinfo) { ret.Add(r); } BillHitComparer bhc = new BillHitComparer(); bhc.scores = scores; ret.Sort(bhc); Hashtable ret2 = new Hashtable(); ret2["count"] = total_count; ret2["method"] = search_method; ret2["results"] = ret; return(ret2); }
/// <summary> /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>. /// </summary> /// <param name="query">Query to extract Terms from</param> /// <param name="terms">Map to place created WeightedSpanTerms in</param> private void Extract(Query query, IDictionary <String, WeightedSpanTerm> terms) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { Extract(queryClauses[i].Query, terms); } } } else if (query is PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery)query); Term[] phraseQueryTerms = phraseQuery.GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.Slop; int[] positions = phraseQuery.GetPositions(); // add largest position increment to slop if (positions.Length > 0) { int lastPos = positions[0]; int largestInc = 0; int sz = positions.Length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos; if (inc > largestInc) { largestInc = inc; } lastPos = pos; } if (largestInc > 1) { slop += largestInc; } } bool inorder = slop == 0; SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } else if (query is TermQuery) { ExtractWeightedTerms(terms, query); } else if (query is SpanQuery) { ExtractWeightedSpanTerms(terms, (SpanQuery)query); } else if (query is FilteredQuery) { Extract(((FilteredQuery)query).Query, terms); } else if (query is DisjunctionMaxQuery) { foreach (var q in ((DisjunctionMaxQuery)query)) { Extract(q, terms); } } else if (query is MultiTermQuery && expandMultiTermQuery) { MultiTermQuery mtq = ((MultiTermQuery)query); if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { mtq = (MultiTermQuery)mtq.Clone(); mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; query = mtq; } FakeReader fReader = new FakeReader(); MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq); if (fReader.Field != null) { IndexReader ir = GetReaderForField(fReader.Field); Extract(query.Rewrite(ir), terms); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } var disjunctLists = new List <SpanQuery> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; List <SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length)); ++distinctPositions; } for (int j = 0; j < termArray.Length; ++j) { disjuncts.Add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { List <SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.ToArray()); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } } }
private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { QueryToSpanQuery(queryClauses[i].Query, payloads); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery)query).GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery)query).Slop; bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term); stq.Boost = query.Boost; GetPayloads(payloads, stq); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery)query); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery)query).Query, payloads); } else if (query is DisjunctionMaxQuery) { for (IEnumerator <Query> iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext();) { QueryToSpanQuery(iterator.Current, payloads); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; System.Collections.Generic.IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } IList <Query>[] disjunctLists = new IList <Query> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; IList <Query> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <Query>(termArray.Length)); ++distinctPositions; } foreach (Term term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { IList <Query> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray())); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } } }
static void Main(string[] args) { var jsonProps = "[{\"name\":\"Ondrej\",\"surname\":\"Kubicek\",\"app_data\":[\"112233\", \"56612\"]}," + "{\"name\":\"Lukas\",\"surname\":\"Bily\",\"app_data\":[\"12355\", \"112233\", \"89466\"]}," + "{\"name\":\"Lenak\",\"surname\":\"Nejaka\",\"app_data\":[\"89700\"]}]"; var version = LuceneVersion.LUCENE_48; var dir = new RAMDirectory(); var analyzer = new StandardAnalyzer(version); var indexConfig = new IndexWriterConfig(version, analyzer); var writer = new IndexWriter(dir, indexConfig); var d = JsonDocument.Parse(jsonProps); var root = d.RootElement; foreach (var line in root.EnumerateArray()) { var doc = new Document(); doc.Add(new StringField("name", line.GetProperty("name").GetString(), Field.Store.NO)); doc.Add(new StringField("surname", line.GetProperty("surname").GetString(), Field.Store.NO)); foreach (var f in line.GetProperty("app_data").EnumerateArray()) { doc.Add(new StringField("app_data", f.GetString(), Field.Store.NO)); } doc.Add(new StringField("payload", line.ToString(), Field.Store.YES)); writer.AddDocument(doc); // Console.WriteLine(line.GetProperty("name")); // if (line.GetProperty("app_data").EnumerateArray().Any(x => x.GetString() == "1")) // { // } // foreach(var data in line.GetProperty("app_data").EnumerateArray()) // { // } // Console.WriteLine(line.GetProperty("app_data").GetArrayLength()); } writer.Flush(false, false); var searcher = new IndexSearcher(writer.GetReader(true)); var query = new MultiPhraseQuery(); query.Add(new Term("app_data", "12355")); var booleanQuery = new BooleanQuery(); booleanQuery.Add(new TermQuery(new Term("app_data", "12355")), Occur.SHOULD); booleanQuery.Add(new TermQuery(new Term("app_data", "89700")), Occur.SHOULD); var res = searcher.Search(booleanQuery, 100); Console.WriteLine(res.TotalHits); foreach (var hit in res.ScoreDocs) { var item = searcher.Doc(hit.Doc); Console.WriteLine(item.Get("payload")); } }
private void GetDataIndexId(DirectoryInfo directoryInfo, ref string textSearcher, ref List <string> lReturn) { using (Directory directory = FSDirectory.Open(directoryInfo)) using (Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30)) using (IndexReader indexReader = IndexReader.Open(directory, true)) using (Searcher indexSearcher = new IndexSearcher(indexReader)) { TopScoreDocCollector collectorMultiPhraseQuery = TopScoreDocCollector.Create(100, true); TopScoreDocCollector collectorQueryParser = TopScoreDocCollector.Create(100, true); int docId = 0; string tempObjectId = string.Empty; List <string> listTemp = new List <string>(); char[] delimiterChars = { ' ', ',', '.', ':', '\t' }; MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery(); //Here implement the search lines for graphs at level 3 multiPhraseQuery.Slop = 3; foreach (var word in textSearcher.Split(delimiterChars)) { multiPhraseQuery.Add(new Term("FullRequest", word)); } indexSearcher.Search(multiPhraseQuery, collectorMultiPhraseQuery); ScoreDoc[] listResultPharseQuery = collectorMultiPhraseQuery.TopDocs().ScoreDocs; foreach (var itemPharseQuery in listResultPharseQuery) { docId = itemPharseQuery.Doc; Document docPharseQuery = indexSearcher.Doc(docId); tempObjectId = docPharseQuery.Get("ObjetcId"); if (!string.IsNullOrEmpty(tempObjectId)) { listTemp.Add(tempObjectId); } } //This lineas implement QueryPArse docId = 0; tempObjectId = string.Empty; var queryParser = new QueryParser(Version.LUCENE_30, "FullRequest", analyzer); var query = queryParser.Parse(textSearcher); indexSearcher.Search(query, collectorQueryParser); ScoreDoc[] listResultquery = collectorQueryParser.TopDocs().ScoreDocs; foreach (var itemQuery in listResultquery) { docId = itemQuery.Doc; Document docQuery = indexSearcher.Doc(docId); tempObjectId = docQuery.Get("ObjetcId"); if (!string.IsNullOrEmpty(tempObjectId)) { listTemp.Add(tempObjectId); } } lReturn.AddRange(listTemp.Distinct().ToList()); } }
/// <summary> /// Creates a query from the analysis chain. /// <para/> /// Expert: this is more useful for subclasses such as queryparsers. /// If using this class directly, just use <see cref="CreateBooleanQuery(string, string)"/> /// and <see cref="CreatePhraseQuery(string, string)"/>. </summary> /// <param name="analyzer"> Analyzer used for this query. </param> /// <param name="operator"> Default boolean operator used for this query. </param> /// <param name="field"> Field to create queries against. </param> /// <param name="queryText"> Text to be passed to the analysis chain. </param> /// <param name="quoted"> <c>true</c> if phrases should be generated when terms occur at more than one position. </param> /// <param name="phraseSlop"> Slop factor for phrase/multiphrase queries. </param> protected Query CreateFieldQuery(Analyzer analyzer, Occur @operator, string field, string queryText, bool quoted, int phraseSlop) { Debug.Assert(@operator == Occur.SHOULD || @operator == Occur.MUST); // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count CachingTokenFilter buffer = null; ITermToBytesRefAttribute termAtt = null; IPositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; bool severalTokensAtSamePosition = false; bool hasMoreTokens = false; TokenStream source = null; try { source = analyzer.GetTokenStream(field, new StringReader(queryText)); source.Reset(); buffer = new CachingTokenFilter(source); buffer.Reset(); if (buffer.HasAttribute <ITermToBytesRefAttribute>()) { termAtt = buffer.GetAttribute <ITermToBytesRefAttribute>(); } if (buffer.HasAttribute <IPositionIncrementAttribute>()) { posIncrAtt = buffer.GetAttribute <IPositionIncrementAttribute>(); } if (termAtt != null) { try { hasMoreTokens = buffer.IncrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.IncrementToken(); } } catch (System.IO.IOException) { // ignore } } } catch (System.IO.IOException e) { throw new Exception("Error analyzing query text", e); } finally { IOUtils.DisposeWhileHandlingException(source); } // rewind the buffer stream buffer.Reset(); BytesRef bytes = termAtt == null ? null : termAtt.BytesRef; if (numTokens == 0) { return(null); } else if (numTokens == 1) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } return(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)))); } else { if (severalTokensAtSamePosition || (!quoted)) { if (positionCount == 1 || (!quoted)) { // no phrase query: if (positionCount == 1) { // simple case: only one position, with synonyms BooleanQuery q = NewBooleanQuery(true); for (int i = 0; i < numTokens; i++) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } Query currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))); q.Add(currentQuery, Occur.SHOULD); } return(q); } else { // multiple positions BooleanQuery q = NewBooleanQuery(false); Query currentQuery = null; for (int i = 0; i < numTokens; i++) { try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0) { if (!(currentQuery is BooleanQuery)) { Query t = currentQuery; currentQuery = NewBooleanQuery(true); ((BooleanQuery)currentQuery).Add(t, Occur.SHOULD); } ((BooleanQuery)currentQuery).Add(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))), Occur.SHOULD); } else { if (currentQuery != null) { q.Add(currentQuery, @operator); } currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))); } } q.Add(currentQuery, @operator); return(q); } } else { // phrase query: MultiPhraseQuery mpq = NewMultiPhraseQuery(); mpq.Slop = phraseSlop; IList <Term> multiTerms = new List <Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.Count > 0) { if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } multiTerms.Clear(); } position += positionIncrement; multiTerms.Add(new Term(field, BytesRef.DeepCopyOf(bytes))); } if (enablePositionIncrements) { mpq.Add(multiTerms.ToArray(), position); } else { mpq.Add(multiTerms.ToArray()); } return(mpq); } } else { PhraseQuery pq = NewPhraseQuery(); pq.Slop = phraseSlop; int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { bool hasNext = buffer.IncrementToken(); Debug.Assert(hasNext == true); termAtt.FillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.PositionIncrement; } } catch (System.IO.IOException) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)), position); } else { pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes))); } } return(pq); } } }
public virtual Query VisitMultiPhraseQuery(MultiPhraseQuery multiPhraseq) { throw new NotImplementedException(); }
public override Query VisitMultiPhraseQuery(MultiPhraseQuery multiPhraseq) { throw new SnNotSupportedException(); }
public virtual void TestCJKSynonymsPhrase() { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.Add(new Term("field", "中")); expected.Add(new Term[] { new Term("field", "国"), new Term("field", "國") }); QueryBuilder builder = new QueryBuilder(new MockCJKSynonymAnalyzer()); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国")); expected.Slop = 3; Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "中国", 3)); }
public virtual void TestSynonymsPhrase() { MultiPhraseQuery expected = new MultiPhraseQuery(); expected.Add(new Term("field", "old")); expected.Add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") }); QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer()); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "old dogs")); }
private Query BuildMatchAnywhereQuery(IndexReader indexReader, string expandedSearchString, bool matchCase) { List <string> searchTerms = null; string adjustedSearchString = expandedSearchString; string fieldToSearch = Constants.IndexFields.Content; if (!matchCase) { fieldToSearch = Constants.IndexFields.ContentCaseInsensitive; adjustedSearchString = adjustedSearchString.ToLower(); } searchTerms = adjustedSearchString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); bool onlyOneTerm = searchTerms.Count == 1; var firstTerm = searchTerms.FirstOrDefault(); var lastTerm = searchTerms.LastOrDefault(); Query contentQuery = null; if (onlyOneTerm) { bool isFirstTermPunctuation = (firstTerm.Length == 1 && LuceneHelper.IsPunctuation(firstTerm.First())); if (isFirstTermPunctuation) { contentQuery = new TermQuery(new Term(fieldToSearch, firstTerm)); } else { contentQuery = new WildcardQuery(new Term(fieldToSearch, "*" + firstTerm + "*")); } } else { MultiPhraseQuery phraseQuery = new MultiPhraseQuery(); List <Term> firstTermMatches = new List <Term>(); List <Term> lastTermMatches = new List <Term>(); CollectFirstAndLastTermMatches(indexReader, fieldToSearch, firstTermMatches, lastTermMatches, firstTerm, lastTerm); if (firstTermMatches.Count > 0) { phraseQuery.Add(firstTermMatches.ToArray()); } bool includeFirstTerm = firstTermMatches.Count == 0; bool includeLastTerm = lastTermMatches.Count == 0; int startIndex = includeFirstTerm ? 0 : 1; int endIndex = searchTerms.Count - (includeLastTerm ? 0 : 1); for (int i = startIndex; i < endIndex; i++) { phraseQuery.Add(new Term(fieldToSearch, searchTerms[i])); } if (lastTermMatches.Count > 0) { phraseQuery.Add(lastTermMatches.ToArray()); } contentQuery = phraseQuery; } return(contentQuery); }
public override Query VisitMultiPhraseQuery(MultiPhraseQuery multiPhraseq) { throw new NotImplementedException(); }
private void lucene_serach_Click(object sender, EventArgs e) { results.Rows.Clear(); var query = search_field.Text.ToLower(); var array = query.Split(' ').ToList(); var searcher = new IndexSearcher(writer1.GetReader(applyAllDeletes: true)); var totalResults = new List <Document>(); //одно слово QueryParser parser = new QueryParser(AppLuceneVersion, "name", analyzer); var phrase = new MultiPhraseQuery(); foreach (var word in array) { var q = parser.Parse(query); if (!String.IsNullOrEmpty(word)) { var res = searcher.Search(q, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } // полное название phrase.Add(new Term("name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } //части слов foreach (var word in array) { if (!string.IsNullOrEmpty(word)) { var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var res = searcher.Search(wild, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } //год и часть слова var year_to_find = ""; var number = 0; foreach (var word in array) { var result = TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } } Console.WriteLine(number != 0); if (number != 0) { phrase = new MultiPhraseQuery(); foreach (var word in array) { if (!string.IsNullOrEmpty(word)) { var booleanQuery = new BooleanQuery(); var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var num = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true); booleanQuery.Add(wild, Occur.SHOULD); booleanQuery.Add(num, Occur.SHOULD); var res = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").GetInt32Value() == foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } } foreach (var doc in totalResults) { results.Rows.Add(doc.GetField("id").GetInt32Value().ToString(), doc.GetValues("name")[0], doc.GetField("year").GetInt32Value().ToString()); } }
private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited()) { QueryToSpanQuery(queryClauses[i].GetQuery(), payloads); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery)query).GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery)query).GetSlop(); bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.SetBoost(query.GetBoost()); GetPayloads(payloads, sp); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).GetTerm()); stq.SetBoost(query.GetBoost()); GetPayloads(payloads, stq); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery)query); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery)query).GetQuery(), payloads); } else if (query is DisjunctionMaxQuery) { for (System.Collections.IEnumerator iterator = ((DisjunctionMaxQuery)query).Iterator(); iterator.MoveNext();) { QueryToSpanQuery((Query)iterator.Current, payloads); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; System.Collections.IList termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } System.Collections.ArrayList[] disjunctLists = new System.Collections.ArrayList[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = (Term[])termArrays[i]; System.Collections.IList disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new System.Collections.ArrayList(termArray.Length)); ++distinctPositions; } for (int j = 0; j < termArray.Length; ++j) { disjuncts.Add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { System.Collections.ArrayList disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery((SpanQuery[])(disjuncts.ToArray(typeof(SpanQuery[])))); } else { ++positionGaps; } } int slop = mpq.GetSlop(); bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.SetBoost(query.GetBoost()); GetPayloads(payloads, sp); } } }
public async Task <SearchResult <T> > SearchAsync(SearchQuery queryDefinition, CancellationToken cancellationToken = default) { using (await writerLock.ReaderLockAsync(cancellationToken)) { var result = new SearchResult <T>(); List <T> hits = new List <T>(); using (var writer = getWriter()) { Query query = new MatchAllDocsQuery(); // Term queries if (queryDefinition.TermQueries.Any()) { var phraseQuery = new MultiPhraseQuery(); foreach (var termQuery in queryDefinition.TermQueries) { phraseQuery.Add( termQuery.value .Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries) .Select(phrase => new Term(termQuery.field, phrase.ToLower())) .ToArray() ); } query = phraseQuery; } var reader = writer.DocsWriter.GetReader(applyAllDeletes: true); var searcher = new IndexSearcher(reader); var luceneResult = searcher.Search(query, queryDefinition.Limit); foreach (var doc in luceneResult.ScoreDocs) { var foundDoc = searcher.Doc(doc.Doc); hits.Add(await inflateDocument(foundDoc)); } result.TotalHits = luceneResult.TotalHits; result.Hits = hits; // Facets if (queryDefinition.Facets.Any()) { FacetsConfig facetsConfig = new FacetsConfig(); FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, query, queryDefinition.FacetMax, fc); using (var taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.Open(Path.Combine(options.IndexPath, indexType, "taxonomy")))) { var facets = new FastTaxonomyFacetCounts(taxonomyReader, facetsConfig, fc); foreach (var facet in queryDefinition.Facets) { var facetGroup = new FacetGroup { Field = facet }; facetGroup.Facets = facets.GetTopChildren(queryDefinition.FacetMax, facet).LabelValues .Select(x => new Facet { Key = x.Label, Count = (long)x.Value }) .ToArray(); result.FacetGroups.Add(facetGroup); } } } } return(result); } }
private void QueryToSpanQuery(Query query, ICollection <byte[]> payloads) { if (query is BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery)query).GetClauses(); for (int i = 0; i < queryClauses.Length; i++) { if (!queryClauses[i].IsProhibited) { QueryToSpanQuery(queryClauses[i].Query, payloads); } } } else if (query is PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery)query).GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery)query).Slop; bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } else if (query is TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery)query).Term); stq.Boost = query.Boost; GetPayloads(payloads, stq); } else if (query is SpanQuery) { GetPayloads(payloads, (SpanQuery)query); } else if (query is FilteredQuery) { QueryToSpanQuery(((FilteredQuery)query).Query, payloads); } else if (query is DisjunctionMaxQuery) { foreach (var q in ((DisjunctionMaxQuery)query)) { QueryToSpanQuery(q, payloads); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } // LUCENENET: Changed from Query to SpanQuery to eliminate the O(n) cast // required to instantiate SpanOrQuery below IList <SpanQuery>[] disjunctLists = new List <SpanQuery> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; IList <SpanQuery> disjuncts = disjunctLists[positions[i]]; // LUCENENET: Changed from Query to SpanQuery if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length)); // LUCENENET: Changed from Query to SpanQuery ++distinctPositions; } foreach (Term term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.Length; ++i) { IList <SpanQuery> disjuncts = disjunctLists[i]; // LUCENENET: Changed from Query to SpanQuery if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; GetPayloads(payloads, sp); } } }
private void button4_Click(object sender, EventArgs e) { int counter = 0; var query = find_text.ToLower(); var array = query.Split(' ').ToList(); List <string> res_list = new List <string>(); var searcher = new IndexSearcher(writer.GetReader()); var totalResults = new List <Document>(); //поиск по одному слову из названия var phrase = new MultiPhraseQuery(); foreach (var word in array) { phrase = new MultiPhraseQuery(); if (!String.IsNullOrEmpty(word)) { phrase.Add(new Term("name", word)); var res = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString())) { totalResults.Add(foundDoc); } } } } //поиск по всем словам названия phrase = new MultiPhraseQuery(); phrase.Add(new Term("name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString())) { totalResults.Add(foundDoc); } } //поиск по частичным словам названия foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var res = searcher.Search(wild, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString())) { totalResults.Add(foundDoc); } } } } //поиск по году и названию (части названия) string year_to_find = ""; int number = 0; foreach (var word in array) { bool result = Int32.TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } } Console.WriteLine(number != 0); if (number != 0) { phrase = new MultiPhraseQuery(); foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { BooleanQuery booleanQuery = new BooleanQuery(); var wild = new WildcardQuery(new Term("name", "*" + word + "*")); var num = NumericRangeQuery.NewIntRange("year", 1, number, number, true, true); booleanQuery.Add(wild, Occur.SHOULD); booleanQuery.Add(num, Occur.SHOULD); var res = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); if (!totalResults.Any(f => f.GetField("id").ToString() == foundDoc.GetField("id").ToString())) { totalResults.Add(foundDoc); } } } } } foreach (var doc in totalResults) { textBox1.AppendText(doc.ToString()); } }
/// <summary> /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <paramref name="query"/>. /// </summary> /// <param name="query"><see cref="Query"/> to extract Terms from</param> /// <param name="terms">Map to place created <see cref="WeightedSpanTerm"/>s in</param> /// <exception cref="System.IO.IOException">If there is a low-level I/O error</exception> protected virtual void Extract(Query query, IDictionary <string, WeightedSpanTerm> terms) { if (query is BooleanQuery) { IList <BooleanClause> queryClauses = ((BooleanQuery)query).Clauses; for (int i = 0; i < queryClauses.Count; i++) { if (!queryClauses[i].IsProhibited) { Extract(queryClauses[i].Query, terms); } } } else if (query is PhraseQuery) { PhraseQuery phraseQuery = (PhraseQuery)query; Term[] phraseQueryTerms = phraseQuery.GetTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; for (int i = 0; i < phraseQueryTerms.Length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.Slop; int[] positions = phraseQuery.GetPositions(); // add largest position increment to slop if (positions.Length > 0) { int lastPos = positions[0]; int largestInc = 0; int sz = positions.Length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos; if (inc > largestInc) { largestInc = inc; } lastPos = pos; } if (largestInc > 1) { slop += largestInc; } } bool inorder = slop == 0; SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } else if (query is TermQuery) { ExtractWeightedTerms(terms, query); } else if (query is SpanQuery) { ExtractWeightedSpanTerms(terms, (SpanQuery)query); } else if (query is FilteredQuery) { Extract(((FilteredQuery)query).Query, terms); } else if (query is ConstantScoreQuery) { Query q = ((ConstantScoreQuery)query).Query; if (q != null) { Extract(q, terms); } } else if (query is CommonTermsQuery) { // specialized since rewriting would change the result query // this query is TermContext sensitive. ExtractWeightedTerms(terms, query); } else if (query is DisjunctionMaxQuery) { foreach (var q in ((DisjunctionMaxQuery)query)) { Extract(q, terms); } } else if (query is MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; IList <Term[]> termArrays = mpq.GetTermArrays(); int[] positions = mpq.GetPositions(); if (positions.Length > 0) { int maxPosition = positions[positions.Length - 1]; for (int i = 0; i < positions.Length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } var disjunctLists = new List <SpanQuery> [maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.Count; ++i) { Term[] termArray = termArrays[i]; List <SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new List <SpanQuery>(termArray.Length)); ++distinctPositions; } foreach (var term in termArray) { disjuncts.Add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; SpanQuery[] clauses = new SpanQuery[distinctPositions]; foreach (var disjuncts in disjunctLists) { if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts.ToArray()); } else { ++positionGaps; } } int slop = mpq.Slop; bool inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); sp.Boost = query.Boost; ExtractWeightedSpanTerms(terms, sp); } } else { Query origQuery = query; if (query is MultiTermQuery) { if (!expandMultiTermQuery) { return; } MultiTermQuery copy = (MultiTermQuery)query.Clone(); copy.MultiTermRewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; origQuery = copy; } IndexReader reader = GetLeafContext().Reader; Query rewritten = origQuery.Rewrite(reader); if (rewritten != origQuery) { // only rewrite once and then flatten again - the rewritten query could have a speacial treatment // if this method is overwritten in a subclass or above in the next recursion Extract(rewritten, terms); } } ExtractUnknownQuery(query, terms); }
public void TestQueryScorerMultiPhraseQueryHighlighting() { MultiPhraseQuery mpq = new MultiPhraseQuery(); mpq.Add(new Term[] {new Term(FIELD_NAME, "wordx"), new Term(FIELD_NAME, "wordb")}); mpq.Add(new Term(FIELD_NAME, "wordy")); DoSearching(mpq); int maxNumFragmentsRequired = 2; AssertExpectedHighlightCount(maxNumFragmentsRequired, 6); }
/// <summary> /// On base of https://lucenenet.apache.org /// </summary> /// <param name="query"></param> /// <returns></returns> public IEnumerable <(string name, int year)> SearchWithLucy(string query) { var words = query.Split(' ').ToList(); var searcher = new IndexSearcher(lucyAdapter.lucyWriter.GetReader(applyAllDeletes: true)); var totalResults = new List <Document>(); //word MultiPhraseQuery multiPhraseQuery; foreach (var word in words) { multiPhraseQuery = new MultiPhraseQuery(); if (string.IsNullOrEmpty(word)) { continue; } multiPhraseQuery.Add(new Term("name_word", word)); var docs = searcher.Search(multiPhraseQuery, 10).ScoreDocs; foreach (var doc in docs) { var document = searcher.Doc(doc.Doc); if (totalResults.All(f => f.GetField("id").GetInt32Value() != document.GetField("id").GetInt32Value())) { totalResults.Add(document); } } } // full name multiPhraseQuery = new MultiPhraseQuery(); multiPhraseQuery.Add(new Term("full_name", query)); var scoreDocs = searcher.Search(multiPhraseQuery, 10).ScoreDocs; foreach (var scoreDoc in scoreDocs) { var doc = searcher.Doc(scoreDoc.Doc); if (totalResults.All(f => f.GetField("id").GetInt32Value() != doc.GetField("id").GetInt32Value())) { totalResults.Add(doc); } } //word parts foreach (var word in words) { if (string.IsNullOrEmpty(word)) { continue; } var wildcardQuery = new WildcardQuery(new Term("name_word", "*" + word + "*")); var docs = searcher.Search(wildcardQuery, 10).ScoreDocs; foreach (var doc in docs) { var document = searcher.Doc(doc.Doc); if (totalResults.All(f => f.GetField("id").GetInt32Value() != document.GetField("id").GetInt32Value())) { totalResults.Add(document); } } } //year and word part var number = 0; foreach (var word in words) { var result = int.TryParse(word, out number); if (!result) { continue; } words.RemoveAt(words.IndexOf(word)); break; } if (number != 0) { foreach (var word in words) { if (string.IsNullOrEmpty(word)) { continue; } var booleanQuery = new BooleanQuery(); var wildcardQuery = new WildcardQuery(new Term("name_word", "*" + word + "*")); var rangeQuery = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true); booleanQuery.Add(wildcardQuery, Occur.SHOULD); booleanQuery.Add(rangeQuery, Occur.SHOULD); var docs = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var doc in docs) { var foundDoc = searcher.Doc(doc.Doc); if (totalResults.All(f => f.GetField("id").GetInt32Value() != foundDoc.GetField("id").GetInt32Value())) { totalResults.Add(foundDoc); } } } } foreach (var doc in totalResults.Take(10)) { yield return(doc.GetValues("full_name")[0], (int)doc.GetField("year").GetInt32Value()); } }
public void TestQueryScorerMultiPhraseQueryHighlightingWithGap() { MultiPhraseQuery mpq = new MultiPhraseQuery(); /* * The toString of MultiPhraseQuery doesn't work so well with these * out-of-order additions, but the Query itself seems to match accurately. */ mpq.Add(new Term[] {new Term(FIELD_NAME, "wordz")}, 2); mpq.Add(new Term[] {new Term(FIELD_NAME, "wordx")}, 0); DoSearching(mpq); int maxNumFragmentsRequired = 1; int expectedHighlights = 2; AssertExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights); }
public void LuceneMinimalExampleTest() { //var dir = new RAMDirectory(); //var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48); //X) Create an index and define a text analyzer ------------------------ // Ensures index backward compatibility const LuceneVersion AppLuceneVersion = LuceneVersion.LUCENE_48; // Construct a machine-independent path for the index var basePath = Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData); var indexPath = Path.Combine(basePath, "index"); System.IO.Directory.Delete(indexPath); using var dir = FSDirectory.Open(indexPath); // Create an analyzer to process the text var analyzer = new StandardAnalyzer(AppLuceneVersion); // Create an index writer var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer); using var writer = new IndexWriter(dir, indexConfig); //X) Add to the index ------------------------ var source = new { Name = "Kermit the Frog", FavoritePhrase = "The quick brown fox jumps over the lazy dog" }; var doc = new Document { // StringField indexes but doesn't tokenize new StringField("name", source.Name, Field.Store.YES), new TextField("favoritePhrase", source.FavoritePhrase, Field.Store.YES) }; writer.AddDocument(doc); writer.Flush(triggerMerge: false, applyAllDeletes: false); //X) Construct a query ------------------------ // Search with a phrase var phrase = new MultiPhraseQuery { new Term("favoritePhrase", "brown"), new Term("favoritePhrase", "fox") }; //X) Fetch the results ------------------------ // Re-use the writer to get real-time updates using var reader = writer.GetReader(applyAllDeletes: true); var searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(phrase, 20 /* top 20 */).ScoreDocs; // Display the output in a table string strDisp = $"{"Score",10}" + $" {"Name",-15}" + $" {"Favorite Phrase",-40}"; Console.WriteLine(strDisp); foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); string str = $"{hit.Score:f8}" + $" {foundDoc.Get("name"),-15}" + $" {foundDoc.Get("favoritePhrase"),-40}"; Console.WriteLine(str); } }
private void lucene_serach_Click(object sender, EventArgs e) { int counter = 0; this.results.Rows.Clear(); var query = this.search_field.Text.ToLower(); var array = query.Split(' ').ToList(); List <string> res_list = new List <string>(); var searcher = new IndexSearcher(writer1.GetReader(applyAllDeletes: true)); //одно слово var phrase = new MultiPhraseQuery(); foreach (var word in array) { phrase = new MultiPhraseQuery(); if (!String.IsNullOrEmpty(word)) { phrase.Add(new Term("name_word", word)); var res = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString()); } } } // полное название phrase = new MultiPhraseQuery(); phrase.Add(new Term("full_name", query)); var hits = searcher.Search(phrase, 10).ScoreDocs; foreach (var hit in hits) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString()); } //части слов foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { var wild = new WildcardQuery(new Term("name_word", "*" + word + "*")); var res = searcher.Search(wild, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString()); } } } //год и часть слова string year_to_find = ""; int number = 0; foreach (var word in array) { bool result = Int32.TryParse(word, out number); if (result && number > 1800 && number <= 9999) { year_to_find = word; array.RemoveAt(array.IndexOf(word)); break; } } Console.WriteLine(number != 0); if (number != 0) { phrase = new MultiPhraseQuery(); foreach (var word in array) { if (!String.IsNullOrEmpty(word)) { BooleanQuery booleanQuery = new BooleanQuery(); var wild = new WildcardQuery(new Term("name_word", "*" + word + "*")); var num = NumericRangeQuery.NewInt32Range("year", 1, number, number, true, true); booleanQuery.Add(wild, Occur.SHOULD); booleanQuery.Add(num, Occur.SHOULD); var res = searcher.Search(booleanQuery, 10).ScoreDocs; foreach (var hit in res) { var foundDoc = searcher.Doc(hit.Doc); var score = hit.Score; this.results.Rows.Add(foundDoc.GetField("id").GetInt32Value().ToString(), foundDoc.GetValues("full_name")[0], foundDoc.GetField("year").GetInt32Value().ToString()); } } } } }